HPM SDK
HPMicro Software Development Kit
hpm_math.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022,2024 HPMicro
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  */
7 
8 #ifndef __HPM_MATH_H__
9 #define __HPM_MATH_H__
10 
11 #include <stddef.h>
17 #define HPM_DSP_HW_NDS32 1 /* andes hardware dsp */
18 
19 #ifdef CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
20 #include CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
21 #else
22 
23 /* Enable Compute Cell Library*/
24 /* #define HPM_EN_MATH_FFA_LIB */
25 /* #define HPM_EN_MATH_DSP_LIB */
26 /* #define HPM_EN_MATH_NN_LIB */
27 
28 #define HPM_MATH_DSP_STATISTICS 1
29 #define HPM_MATH_DSP_BASIC 1
30 #define HPM_MATH_DSP_COMPLEX 1
31 #define HPM_MATH_DSP_CONTROLLER 1
32 #define HPM_MATH_DSP_DISTANCE 1
33 #define HPM_MATH_DSP_FILTERING 1
34 #define HPM_MATH_DSP_MATRIX 1
35 #define HPM_MATH_DSP_SVM 1
36 #define HPM_MATH_DSP_TRANSFORM 1
37 #define HPM_MATH_DSP_UTILS 1
38 #define HPM_MATH_DSP_SORT 1
39 
40 #define HPM_MATH_NN_ACTIVATION 1
41 #define HPM_MATH_NN_TINYENGINE 1
42 #define HPM_MATH_NN_BASIC 1
43 #define HPM_MATH_NN_CONCATENATION 1
44 #define HPM_MATH_NN_CONVOLUTION 1
45 #define HPM_MATH_NN_CONNECTED 1
46 #define HPM_MATH_NN_POOLING 1
47 #define HPM_MATH_NN_SOFTMAX 1
48 #define HPM_MATH_NN_UTIL 1
49 
50 #define HPM_DSP_CORE HPM_DSP_HW_NDS32 /* DSP core selection */
51 
52 #define HPM_MATH_PI (3.14159265358979323846)
53 
62 #define HPM_MATH_SW_FFT_CHECKLIST
63 
64 #endif
65 
66 #ifdef __cplusplus
67 extern "C"
68 {
69 #endif
70 
71 #ifdef HPM_MATH_DSP_STATISTICS
72 
79 #ifdef HPM_EN_MATH_DSP_LIB
80 
81 #ifdef __zcc__
82 #include "tpt_math.h"
83 #endif
84 
85 #include "riscv_dsp_statistics_math.h"
86 
87 // Maximum
95 static inline float32_t hpm_dsp_max_f32(const float32_t *src, uint32_t size, uint32_t *index)
96 {
97 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
98 #ifdef __zcc__
99  f32_t res;
100  tpt_max_f32(&res, index, src, size);
101  return res;
102 #else
103  return riscv_dsp_max_f32(src, size, index);
104 #endif
105 #endif
106 }
107 static inline float32_t hpm_dsp_max_val_f32(const float32_t *src, uint32_t size)
108 {
109 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
110  return riscv_dsp_max_val_f32(src, size);
111 #endif
112 }
120 static inline q15_t hpm_dsp_max_q15(const q15_t *src, uint32_t size, uint32_t *index)
121 {
122 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
123 #ifdef __zcc__
124  q15_t res;
125  tpt_max_q15(&res, index, src, size);
126  return res;
127 #else
128  return riscv_dsp_max_q15(src, size, index);
129 #endif
130 #endif
131 }
132 
140 static inline q31_t hpm_dsp_max_q31(const q31_t *src, uint32_t size, uint32_t *index)
141 {
142 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
143  #ifdef __zcc__
144  q31_t res;
145  tpt_max_q31(&res, index, src, size);
146  return res;
147 #else
148  return riscv_dsp_max_q31(src, size, index);
149 #endif
150 #endif
151 }
152 
160 static inline q7_t hpm_dsp_max_q7(const q7_t *src, uint32_t size, uint32_t *index)
161 {
162 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
163 #ifdef __zcc__
164  q7_t res;
165  tpt_max_q7(&res, index, src, size);
166  return res;
167 #else
168  return riscv_dsp_max_q7(src, size, index);
169 #endif
170 #endif
171 }
172 
180 static inline uint8_t hpm_dsp_max_u8(const uint8_t *src, uint32_t size, uint32_t *index)
181 {
182 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
183  return riscv_dsp_max_u8(src, size, index);
184 #endif
185 }
186 
187 // Minimum
195 static inline float32_t hpm_dsp_min_f32(const float32_t *src, uint32_t size, uint32_t *index)
196 {
197 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
198 #ifdef __zcc__
199  f32_t res;
200  tpt_min_f32(&res, index, src, size);
201  return res;
202 #else
203  return riscv_dsp_min_f32(src, size, index);
204 #endif
205 #endif
206 }
207 
215 static inline q15_t hpm_dsp_min_q15(const q15_t *src, uint32_t size, uint32_t *index)
216 {
217 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
218 #ifdef __zcc__
219  q15_t res;
220  tpt_min_q15(&res, index, src, size);
221  return res;
222 #else
223  return riscv_dsp_min_q15(src, size, index);
224 #endif
225 #endif
226 }
227 
235 static inline q31_t hpm_dsp_min_q31(const q31_t *src, uint32_t size, uint32_t *index)
236 {
237 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
238 #ifdef __zcc__
239  q31_t res;
240  tpt_min_q31(&res, index, src, size);
241  return res;
242 #else
243  return riscv_dsp_min_q31(src, size, index);
244 #endif
245 #endif
246 }
247 
255 static inline q7_t hpm_dsp_min_q7(const q7_t *src, uint32_t size, uint32_t *index)
256 {
257 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
258 #ifdef __zcc__
259  q7_t res;
260  tpt_min_q7(&res, index, src, size);
261  return res;
262 #else
263  return riscv_dsp_min_q7(src, size, index);
264 #endif
265 #endif
266 }
267 
275 static inline uint8_t hpm_dsp_min_u8(const uint8_t *src, uint32_t size, uint32_t *index)
276 {
277 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
278  return riscv_dsp_min_u8(src, size, index);
279 #endif
280 }
281 
282 // Mean
289 static inline float32_t hpm_dsp_mean_f32(const float32_t *src, uint32_t size)
290 {
291 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
292 #ifdef __zcc__
293  f32_t res;
294  tpt_mean_f32(&res, src, size);
295  return res;
296 #else
297  return riscv_dsp_mean_f32(src, size);
298 #endif
299 #endif
300 }
301 
313 static inline q15_t hpm_dsp_mean_q15(const q15_t *src, uint32_t size)
314 {
315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
316 #ifdef __zcc__
317  q15_t res;
318  tpt_mean_q15(&res, src, size);
319  return res;
320 #else
321  return riscv_dsp_mean_q15(src, size);
322 #endif
323 #endif
324 }
325 
337 static inline q31_t hpm_dsp_mean_q31(const q31_t *src, uint32_t size)
338 {
339 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
340 #ifdef __zcc__
341  q31_t res;
342  tpt_mean_q31(&res, src, size);
343  return res;
344 #else
345  return riscv_dsp_mean_q31(src, size);
346 #endif
347 #endif
348 }
349 
361 static inline q7_t hpm_dsp_mean_q7(const q7_t *src, uint32_t size)
362 {
363 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
364 #ifdef __zcc__
365  q7_t res;
366  tpt_mean_q7(&res, src, size);
367  return res;
368 #else
369  return riscv_dsp_mean_q7(src, size);
370 #endif
371 #endif
372 }
373 
383 static inline uint8_t hpm_dsp_mean_u8(const uint8_t *src, uint32_t size)
384 {
385 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
386  return riscv_dsp_mean_u8(src, size);
387 #endif
388 }
389 
390 // Sun of the Squares
397 static inline float32_t hpm_dsp_pwr_f32(const float32_t *src, uint32_t size)
398 {
399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
400 #ifdef __zcc__
401  f32_t res;
402  tpt_power_f32(&res, src, size);
403  return res;
404 #else
405  return riscv_dsp_pwr_f32(src, size);
406 #endif
407 #endif
408 }
409 
422 static inline q63_t hpm_dsp_pwr_q15(const q15_t *src, uint32_t size)
423 {
424 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
425 #ifdef __zcc__
426  q63_t res;
427  tpt_power_q15(&res, src, size);
428  return res;
429 #else
430  return riscv_dsp_pwr_q15(src, size);
431 #endif
432 #endif
433 }
434 
448 static inline q63_t hpm_dsp_pwr_q31(const q31_t *src, uint32_t size)
449 {
450 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
451 #ifdef __zcc__
452  q63_t res;
453  tpt_power_q31(&res, src, size);
454  return res;
455 #else
456  return riscv_dsp_pwr_q31(src, size);
457 #endif
458 #endif
459 }
460 
473 static inline q31_t hpm_dsp_pwr_q7(const q7_t *src, uint32_t size)
474 {
475 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
476 #ifdef __zcc__
477  q31_t res;
478  tpt_power_q7(&res, src, size);
479  return res;
480 #else
481  return riscv_dsp_pwr_q7(src, size);
482 #endif
483 #endif
484 }
485 
486 // Root Mean Square
493 static inline float32_t hpm_dsp_rms_f32(const float32_t *src, uint32_t size)
494 {
495 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
496 #ifdef __zcc__
497  f32_t res;
498  tpt_rms_f32(&res, src, size);
499  return res;
500 #else
501  return riscv_dsp_rms_f32(src, size);
502 #endif
503 #endif
504 }
505 
519 static inline q15_t hpm_dsp_rms_q15(const q15_t *src, uint32_t size)
520 {
521 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
522 #ifdef __zcc__
523  q15_t res;
524  tpt_rms_q15(&res, src, size);
525  return res;
526 #else
527  return riscv_dsp_rms_q15(src, size);
528 #endif
529 #endif
530 }
531 
545 static inline q31_t hpm_dsp_rms_q31(const q31_t *src, uint32_t size)
546 {
547 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
548 #ifdef __zcc__
549  q31_t res;
550  tpt_rms_q31(&res, src, size);
551  return res;
552 #else
553  return riscv_dsp_rms_q31(src, size);
554 #endif
555 #endif
556 }
557 
558 // Standard deviation
565 static inline float32_t hpm_dsp_std_f32(const float32_t *src, uint32_t size)
566 {
567 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
568 #ifdef __zcc__
569  f32_t res;
570  tpt_std_f32(&res, src, size);
571  return res;
572 #else
573  return riscv_dsp_std_f32(src, size);
574 #endif
575 #endif
576 }
577 
591 static inline q15_t hpm_dsp_std_q15(const q15_t *src, uint32_t size)
592 {
593 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
594 #ifdef __zcc__
595  q15_t res;
596  tpt_std_q15(&res, src, size);
597  return res;
598 #else
599  return riscv_dsp_std_q15(src, size);
600 #endif
601 #endif
602 }
603 
617 static inline q31_t hpm_dsp_std_q31(const q31_t *src, uint32_t size)
618 {
619 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
620 #ifdef __zcc__
621  q31_t res;
622  tpt_std_q31(&res, src, size);
623  return res;
624 #else
625  return riscv_dsp_std_q31(src, size);
626 #endif
627 #endif
628 }
629 
642 static inline q15_t hpm_dsp_std_u8(const uint8_t *src, uint32_t size)
643 {
644 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
645  return riscv_dsp_std_u8(src, size);
646 #endif
647 }
648 
649 // Variance
656 static inline float32_t hpm_dsp_var_f32(const float32_t *src, uint32_t size)
657 {
658 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
659 #ifdef __zcc__
660  f32_t res;
661  tpt_var_f32(&res, src, size);
662  return res;
663 #else
664  return riscv_dsp_var_f32(src, size);
665 #endif
666 #endif
667 }
668 
682 static inline q31_t hpm_dsp_var_q15(const q15_t *src, uint32_t size)
683 {
684 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
685 #ifdef __zcc__
686  q15_t res;
687  tpt_var_q15(&res, src, size);
688  return res;
689 #else
690  return riscv_dsp_var_q15(src, size);
691 #endif
692 #endif
693 }
694 
708 static inline q63_t hpm_dsp_var_q31(const q31_t *src, uint32_t size)
709 {
710 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
711 #ifdef __zcc__
712  q31_t res;
713  tpt_var_q31(&res, src, size);
714  return res;
715 #else
716  return riscv_dsp_var_q31(src, size);
717 #endif
718 #endif
719 }
720 
729 static inline float32_t hpm_dsp_entropy_f32(const float32_t *src, uint32_t size)
730 {
731 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
732 #ifdef __zcc__
733  return tpt_entropy_f32(src, size);
734 #else
735  return riscv_dsp_entropy_f32(src, size);
736 #endif
737 #endif
738 }
739 
740 
752 static inline float32_t hpm_dsp_relative_entropy_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
753 {
754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
755 #ifdef __zcc__
756  return tpt_relative_entropy_f32(src1, src2, size);
757 #else
758  return riscv_dsp_relative_entropy_f32(src1, src2, size);
759 #endif
760 #endif
761 }
762 
770 static inline float32_t hpm_dsp_lse_f32(const float32_t *src, uint32_t size)
771 {
772 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
773 #ifdef __zcc__
774  tpt_lse_f32(src, size);
775 #else
776  return riscv_dsp_lse_f32(src, size);
777 #endif
778 #endif
779 }
780 
790 static inline float32_t hpm_dsp_lse_dprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *buffer)
791 {
792 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
793 #ifdef __zcc__
794  return tpt_lse_dprod_f32(src1, src2, size, buffer);
795 #else
796  return riscv_dsp_lse_dprod_f32(src1, src2, size, buffer);
797 #endif
798 #endif
799 }
800 
810 static inline uint32_t hpm_dsp_gaussian_naive_bayes_est_f32(const riscv_dsp_gaussian_naivebayes_f32_t *instance, const float32_t * src, float32_t *buf)
811 {
812 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
813  return riscv_dsp_gaussian_naive_bayes_est_f32(instance, src, buf);
814 #endif
815 }
816 
824 static inline float32_t hpm_dsp_absmax_f32(const float32_t* src, uint32_t size, uint32_t* index)
825 {
826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
827  return riscv_dsp_absmax_f32(src, size, index);
828 #endif
829 }
830 
838 static inline q15_t hpm_dsp_absmax_q15(const q15_t* src, uint32_t size, uint32_t* index)
839 {
840 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
841  return riscv_dsp_absmax_q15(src, size, index);
842 #endif
843 }
844 
852 static inline q31_t hpm_dsp_absmax_q31(const q31_t* src, uint32_t size, uint32_t* index)
853 {
854 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
855  return riscv_dsp_absmax_q31(src, size, index);
856 #endif
857 }
858 
866 static inline q7_t hpm_dsp_absmax_q7(const q7_t* src, uint32_t size, uint32_t* index)
867 {
868 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
869  return riscv_dsp_absmax_q7(src, size, index);
870 #endif
871 }
872 
880 static inline float32_t hpm_dsp_absmin_f32(const float32_t* src, uint32_t size, uint32_t* index)
881 {
882 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
883  return riscv_dsp_absmin_f32(src, size, index);
884 #endif
885 }
886 
894 static inline q31_t hpm_dsp_absmin_q31(const q31_t* src, uint32_t size, uint32_t* index)
895 {
896 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
897  return riscv_dsp_absmin_q31(src, size, index);
898 #endif
899 }
900 
908 static inline q15_t hpm_dsp_absmin_q15(const q15_t* src, uint32_t size, uint32_t* index)
909 {
910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
911  return riscv_dsp_absmin_q15(src, size, index);
912 #endif
913 }
914 
922 static inline q7_t hpm_dsp_absmin_q7(const q7_t* src, uint32_t size, uint32_t* index)
923 {
924 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
925  return riscv_dsp_absmin_q7(src, size, index);
926 #endif
927 }
928 
929 #endif
930 #endif
931 
937 #ifdef HPM_MATH_DSP_BASIC
938 
944 #ifdef HPM_EN_MATH_DSP_LIB
945 
946 #ifdef __zcc__
947 #include "tpt_math.h"
948 #endif
949 
950 #include "riscv_dsp_basic_math.h"
951 
952 // Absolute value
959 static inline void hpm_dsp_abs_f32(float32_t *src, float32_t *dst, uint32_t size)
960 {
961 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
962 #ifdef __zcc__
963  tpt_abs_f32(dst, src, size);
964 #else
965  riscv_dsp_abs_f32(src, dst, size);
966 #endif
967 #endif
968 }
969 
979 static inline void hpm_dsp_abs_q31(q31_t *src, q31_t *dst, uint32_t size)
980 {
981 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
982 #ifdef __zcc__
983  tpt_abs_q31(dst, src, size);
984 #else
985  riscv_dsp_abs_q31(src, dst, size);
986 #endif
987 
988 #endif
989 }
990 
1000 static inline void hpm_dsp_abs_q15(q15_t *src, q15_t *dst, uint32_t size)
1001 {
1002 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1003 #ifdef __zcc__
1004  tpt_abs_q15(dst, src, size);
1005 #else
1006  riscv_dsp_abs_q15(src, dst, size);
1007 #endif
1008 #endif
1009 }
1010 
1020 static inline void hpm_dsp_abs_q7(q7_t *src, q7_t *dst, uint32_t size)
1021 {
1022 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1023 #ifdef __zcc__
1024  tpt_abs_q7(dst, src, size);
1025 #else
1026  riscv_dsp_abs_q7(src, dst, size);
1027 #endif
1028 #endif
1029 }
1030 
1031 // Addition
1039 static inline void hpm_dsp_add_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1040 {
1041 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1042 #ifdef __zcc__
1043  tpt_add_f32(dst, src1, src2, size);
1044 #else
1045  riscv_dsp_add_f32(src1, src2, dst, size);
1046 #endif
1047 #endif
1048 }
1049 
1059 static inline void hpm_dsp_add_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
1060 {
1061 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1062 #ifdef __zcc__
1063  tpt_add_q31(dst, src1, src2, size);
1064 #else
1065  riscv_dsp_add_q31(src1, src2, dst, size);
1066 #endif
1067 #endif
1068 }
1069 
1079 static inline void hpm_dsp_add_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
1080 {
1081 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1082 #ifdef __zcc__
1083  tpt_add_q15(dst, src1, src2, size);
1084 #else
1085  riscv_dsp_add_q15(src1, src2, dst, size);
1086 #endif
1087 #endif
1088 }
1089 
1099 static inline void hpm_dsp_add_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
1100 {
1101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1102 #ifdef __zcc__
1103  tpt_add_q7(dst, src1, src2, size);
1104 #else
1105  riscv_dsp_add_q7(src1, src2, dst, size);
1106 #endif
1107 #endif
1108 }
1109 
1119 static inline void hpm_dsp_add_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
1120 {
1121 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1122 #ifdef __zcc__
1123  tpt_add_u8_u16(dst, src1, src2, size);
1124 #else
1125  riscv_dsp_add_u8_u16(src1, src2, dst, size);
1126 #endif
1127 #endif
1128 }
1129 
1130 // Subtraction
1138 static inline void hpm_dsp_sub_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1139 {
1140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1141 #ifdef __zcc__
1142  tpt_sub_f32(dst, src1, src2, size);
1143 #else
1144  riscv_dsp_sub_f32(src1, src2, dst, size);
1145 #endif
1146 #endif
1147 }
1148 
1158 static inline void hpm_dsp_sub_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
1159 {
1160 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1161 #ifdef __zcc__
1162  tpt_sub_q31(dst, src1, src2, size);
1163 #else
1164  riscv_dsp_sub_q31(src1, src2, dst, size);
1165 #endif
1166 #endif
1167 }
1168 
1178 static inline void hpm_dsp_sub_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
1179 {
1180 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1181 #ifdef __zcc__
1182  tpt_sub_q15(dst, src1, src2, size);
1183 #else
1184  riscv_dsp_sub_q15(src1, src2, dst, size);
1185 #endif
1186 #endif
1187 }
1188 
1198 static inline void hpm_dsp_sub_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
1199 {
1200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1201 #ifdef __zcc__
1202  tpt_sub_q7(dst, src1, src2, size);
1203 #else
1204  riscv_dsp_sub_q7(src1, src2, dst, size);
1205 #endif
1206 #endif
1207 }
1208 
1218 static inline void hpm_dsp_sub_u8_q7(uint8_t *src1, uint8_t *src2, q7_t *dst, uint32_t size)
1219 {
1220 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1221  riscv_dsp_sub_u8_q7(src1, src2, dst, size);
1222 #endif
1223 }
1224 
1225 // Multiplication
1233 static inline void hpm_dsp_mul_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1234 {
1235 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1236 #ifdef __zcc__
1237  tpt_mult_f32(dst, src1, src2, size);
1238 #else
1239  riscv_dsp_mul_f32(src1, src2, dst, size);
1240 #endif
1241 #endif
1242 }
1243 
1253 static inline void hpm_dsp_mul_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
1254 {
1255 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1256 #ifdef __zcc__
1257  tpt_mult_q31(dst, src1, src2, size);
1258 #else
1259  riscv_dsp_mul_q31(src1, src2, dst, size);
1260 #endif
1261 #endif
1262 }
1263 
1273 static inline void hpm_dsp_mul_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
1274 {
1275 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1276 #ifdef __zcc__
1277  tpt_mult_q15(dst, src1, src2, size);
1278 #else
1279  riscv_dsp_mul_q15(src1, src2, dst, size);
1280 #endif
1281 #endif
1282 }
1283 
1293 static inline void hpm_dsp_mul_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
1294 {
1295 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1296 #ifdef __zcc__
1297  tpt_mult_q7(dst, src1, src2, size);
1298 #else
1299  riscv_dsp_mul_q7(src1, src2, dst, size);
1300 #endif
1301 #endif
1302 }
1303 
1313 static inline void hpm_dsp_mul_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
1314 {
1315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1316  riscv_dsp_mul_u8_u16(src1, src2, dst, size);
1317 #endif
1318 }
1319 
1320 // Division
1328 static inline void hpm_dsp_div_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1329 {
1330 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1331 #ifdef __zcc__
1332  tpt_div_f32(dst, src1, src2, size);
1333 #else
1334  riscv_dsp_div_f32(src1, src2, dst, size);
1335 #endif
1336 #endif
1337 }
1338 
1345 static inline q31_t hpm_dsp_div_q31(q31_t src1, q31_t src2)
1346 {
1347 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1348 #ifdef __zcc__
1349  return tpt_div_q31(src1, src2);
1350 #else
1351  return riscv_dsp_div_q31(src1, src2);
1352 #endif
1353 #endif
1354 }
1355 
1362 static inline q31_t hpm_dsp_div_s64_u32(q63_t src1, uint32_t src2)
1363 {
1364 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1365 #ifdef __zcc__
1366  return tpt_div_s64_u32(src1, src2);
1367 #else
1368  return riscv_dsp_div_s64_u32(src1, src2);
1369 #endif
1370 #endif
1371 }
1372 
1379 static inline q31_t hpm_dsp_div_u64_u32(uint64_t src1, uint32_t src2)
1380 {
1381 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1382 #ifdef __zcc__
1383  return tpt_div_u64_u32(src1, src2);
1384 #else
1385  return riscv_dsp_div_u64_u32(src1, src2);
1386 #endif
1387 #endif
1388 }
1389 
1390 // Negation
1397 static inline void hpm_dsp_neg_f32(float32_t *src, float32_t *dst, uint32_t size)
1398 {
1399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1400 #ifdef __zcc__
1401  tpt_negate_f32(dst, src, size);
1402 #else
1403  riscv_dsp_neg_f32(src, dst, size);
1404 #endif
1405 #endif
1406 }
1407 
1417 static inline void hpm_dsp_neg_q31(q31_t *src, q31_t *dst, uint32_t size)
1418 {
1419 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1420 #ifdef __zcc__
1421  tpt_negate_q31(dst, src, size);
1422 #else
1423  riscv_dsp_neg_q31(src, dst, size);
1424 #endif
1425 #endif
1426 }
1427 
1437 static inline void hpm_dsp_neg_q15(q15_t *src, q15_t *dst, uint32_t size)
1438 {
1439 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1440 #ifdef __zcc__
1441  tpt_negate_q15(dst, src, size);
1442 #else
1443  riscv_dsp_neg_q15(src, dst, size);
1444 #endif
1445 #endif
1446 }
1447 
1457 static inline void hpm_dsp_neg_q7(q7_t *src, q7_t *dst, uint32_t size)
1458 {
1459 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1460 #ifdef __zcc__
1461  tpt_negate_q7(dst, src, size);
1462 #else
1463  riscv_dsp_neg_q7(src, dst, size);
1464 #endif
1465 #endif
1466 }
1467 
1468 // Dot Production
1476 static inline float32_t hpm_dsp_dprod_f32(float32_t *src1, float32_t *src2, uint32_t size)
1477 {
1478 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1479 #ifdef __zcc__
1480  f32_t res;
1481  tpt_dot_prod_f32(&res, src1, src2, size);
1482  return res;
1483 #else
1484  return riscv_dsp_dprod_f32(src1, src2, size);
1485 #endif
1486 #endif
1487 }
1488 
1501 static inline q63_t hpm_dsp_dprod_q31(q31_t *src1, q31_t *src2, uint32_t size)
1502 {
1503 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1504 #ifdef __zcc__
1505  q63_t res;
1506  tpt_dot_prod_q31(&res, src1, src2, size);
1507  return res;
1508 #else
1509  return riscv_dsp_dprod_q31(src1, src2, size);
1510 #endif
1511 #endif
1512 }
1513 
1524 static inline q63_t hpm_dsp_dprod_q15(q15_t *src1, q15_t *src2, uint32_t size)
1525 {
1526 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1527 #ifdef __zcc__
1528  q63_t res;
1529  tpt_dot_prod_q15(&res, src1, src2, size);
1530  return res;
1531 #else
1532  return riscv_dsp_dprod_q15(src1, src2, size);
1533 #endif
1534 #endif
1535 }
1536 
1548 static inline q31_t hpm_dsp_dprod_u8xq15(uint8_t *src1, q15_t *src2, uint32_t size)
1549 {
1550 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1551  return riscv_dsp_dprod_u8xq15(src1, src2, size);
1552 #endif
1553 }
1554 
1555 
1566 static inline q31_t hpm_dsp_dprod_q7(q7_t *src1, q7_t *src2, uint32_t size)
1567 {
1568 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1569 #ifdef __zcc__
1570  q31_t res;
1571  tpt_dot_prod_q7(&res, src1, src2, size);
1572  return res;
1573 #else
1574  return riscv_dsp_dprod_q7(src1, src2, size);
1575 #endif
1576 #endif
1577 }
1578 
1589 static inline q31_t hpm_dsp_dprod_q7xq15(q7_t *src1, q15_t *src2, uint32_t size)
1590 {
1591 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1592  return riscv_dsp_dprod_q7xq15(src1, src2, size);
1593 #endif
1594 }
1595 
1606 static inline uint32_t hpm_dsp_dprod_u8(uint8_t *src1, uint8_t *src2, uint32_t size)
1607 {
1608 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1609  return riscv_dsp_dprod_u8(src1, src2, size);
1610 #endif
1611 }
1612 
1613 // Offset
1621 static inline void hpm_dsp_offset_f32(float32_t *src, float32_t offset, float32_t *dst, uint32_t size)
1622 {
1623 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1624 #ifdef __zcc__
1625  tpt_offset_f32(dst, src, offset, size);
1626 #else
1627  riscv_dsp_offset_f32(src, offset, dst, size);
1628 #endif
1629 #endif
1630 }
1631 
1641 static inline void hpm_dsp_offset_q31(q31_t *src, q31_t offset, q31_t *dst, uint32_t size)
1642 {
1643 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1644 #ifdef __zcc__
1645  tpt_offset_q31(dst, src, offset, size);
1646 #else
1647  riscv_dsp_offset_q31(src, offset, dst, size);
1648 #endif
1649 #endif
1650 }
1651 
1661 static inline void hpm_dsp_offset_q15(q15_t *src, q15_t offset, q15_t *dst, uint32_t size)
1662 {
1663 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1664 #ifdef __zcc__
1665  tpt_offset_q15(dst, src, offset, size);
1666 #else
1667  riscv_dsp_offset_q15(src, offset, dst, size);
1668 #endif
1669 #endif
1670 }
1671 
1681 static inline void hpm_dsp_offset_q7(q7_t *src, q7_t offset, q7_t *dst, uint32_t size)
1682 {
1683 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1684 #ifdef __zcc__
1685  tpt_offset_q7(dst, src, offset, size);
1686 #else
1687  riscv_dsp_offset_q7(src, offset, dst, size);
1688 #endif
1689 #endif
1690 }
1691 
1701 static inline void hpm_dsp_offset_u8(uint8_t *src, q7_t offset, uint8_t *dst, uint32_t size)
1702 {
1703 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1704  riscv_dsp_offset_u8(src, offset, dst, size);
1705 #endif
1706 }
1707 
1708 // Scale
1716 static inline void hpm_dsp_scale_f32(float32_t *src, float32_t scale, float32_t *dst, uint32_t size)
1717 {
1718 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1719 #ifdef __zcc__
1720  tpt_scale_f32(dst, src, scale, size);
1721 #else
1722  riscv_dsp_scale_f32(src, scale, dst, size);
1723 #endif
1724 #endif
1725 }
1726 
1739 static inline void hpm_dsp_scale_q31(q31_t *src, q31_t scalefract, int8_t shift, q31_t *dst, uint32_t size)
1740 {
1741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1742 #ifdef __zcc__
1743  tpt_scale_q31(dst, src, scalefract, shift, size);
1744 #else
1745  riscv_dsp_scale_q31(src, scalefract, shift, dst, size);
1746 #endif
1747 #endif
1748 }
1749 
1762 static inline void hpm_dsp_scale_q15(q15_t *src, q15_t scalefract, int8_t shift, q15_t *dst, uint32_t size)
1763 {
1764 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1765 #ifdef __zcc__
1766  tpt_scale_q15(dst, src, scalefract, shift, size);
1767 #else
1768  riscv_dsp_scale_q15(src, scalefract, shift, dst, size);
1769 #endif
1770 #endif
1771 }
1772 
1785 static inline void hpm_dsp_scale_q7(q7_t *src, q7_t scalefract, int8_t shift, q7_t *dst, uint32_t size)
1786 {
1787 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1788 #ifdef __zcc__
1789  tpt_scale_q7(dst, src, scalefract, shift, size);
1790 #else
1791  riscv_dsp_scale_q7(src, scalefract, shift, dst, size);
1792 #endif
1793 #endif
1794 }
1795 
1807 static inline void hpm_dsp_scale_u8(uint8_t *src, q7_t scalefract, int8_t shift, uint8_t *dst, uint32_t size)
1808 {
1809 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1810  riscv_dsp_scale_u8(src, scalefract, shift, dst, size);
1811 #endif
1812 }
1813 
1814 // Shift
1825 static inline void hpm_dsp_shift_q15(q15_t *src, int8_t shift, q15_t *dst, uint32_t size)
1826 {
1827 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1828 #ifdef __zcc__
1829  tpt_shift_q15(dst, src, shift, size);
1830 #else
1831  riscv_dsp_shift_q15(src, shift, dst, size);
1832 #endif
1833 #endif
1834 }
1835 
1846 static inline void hpm_dsp_shift_q31(q31_t *src, int8_t shift, q31_t *dst, uint32_t size)
1847 {
1848 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1849 #ifdef __zcc__
1850  tpt_shift_q31(dst, src, shift, size);
1851 #else
1852  riscv_dsp_shift_q31(src, shift, dst, size);
1853 #endif
1854 #endif
1855 }
1856 
1867 static inline void hpm_dsp_shift_q7(q7_t *src, int8_t shift, q7_t *dst, uint32_t size)
1868 {
1869 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1870 #ifdef __zcc__
1871  tpt_shift_q7(dst, src, shift, size);
1872 #else
1873  riscv_dsp_shift_q7(src, shift, dst, size);
1874 #endif
1875 #endif
1876 }
1877 
1888 static inline void hpm_dsp_shift_u8(uint8_t *src, int8_t shift, uint8_t *dst, uint32_t size)
1889 {
1890 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1891  riscv_dsp_shift_u8(src, shift, dst, size);
1892 #endif
1893 }
1894 
1908 static inline void hpm_dsp_clip_f32(float32_t *src, float32_t *dst, float32_t low, float32_t high, uint32_t size)
1909 {
1910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1911 #ifdef __zcc__
1912  tpt_clip_f32(dst, src, low, high, size);
1913 #else
1914  riscv_dsp_clip_f32(src, dst, low, high, size);
1915 #endif
1916 #endif
1917 }
1927 static inline void hpm_dsp_clip_q31(q31_t *src, q31_t *dst, q31_t low, q31_t high, uint32_t size)
1928 {
1929 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1930 #ifdef __zcc__
1931  tpt_clip_q31(dst, src, low, high, size);
1932 #else
1933  riscv_dsp_clip_q31(src, dst, low, high, size);
1934 #endif
1935 #endif
1936 }
1946 static inline void hpm_dsp_clip_q15(q15_t *src, q15_t *dst, q15_t low, q15_t high, uint32_t size)
1947 {
1948 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1949 #ifdef __zcc__
1950  tpt_clip_q15(dst, src, low, high, size);
1951 #else
1952  riscv_dsp_clip_q15(src, dst, low, high, size);
1953 #endif
1954 #endif
1955 }
1965 static inline void hpm_dsp_clip_q7(q7_t *src, q7_t *dst, q7_t low, q7_t high, uint32_t size)
1966 {
1967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1968 #ifdef __zcc__
1969  tpt_clip_q7(dst, src, low, high, size);
1970 #else
1971  riscv_dsp_clip_q7(src, dst, low, high, size);
1972 #endif
1973 #endif
1974 }
1977 // AND
1998 static inline void hpm_dsp_and_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
1999 {
2000 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2001 #ifdef __zcc__
2002  tpt_and_32bit(dst, src1, src2, size);
2003 #else
2004  riscv_dsp_and_u32(src1, src2, dst, size);
2005 #endif
2006 #endif
2007 }
2008 
2017 static inline void hpm_dsp_and_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
2018 {
2019 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2020 #ifdef __zcc__
2021  tpt_and_16bit(dst, src1, src2, size);
2022 #else
2023  riscv_dsp_and_u16(src1, src2, dst, size);
2024 #endif
2025 #endif
2026 }
2027 
2036 static inline void hpm_dsp_and_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
2037 {
2038 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2039 #ifdef __zcc__
2040  tpt_and_8bit(dst, src1, src2, size);
2041 #else
2042  riscv_dsp_and_u8(src1, src2, dst, size);
2043 #endif
2044 #endif
2045 }
2048 // OR
2069 static inline void hpm_dsp_or_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
2070 {
2071 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2072 #ifdef __zcc__
2073  tpt_or_32bit(dst, src1, src2, size);
2074 #else
2075  riscv_dsp_or_u32(src1, src2, dst, size);
2076 #endif
2077 #endif
2078 }
2087 static inline void hpm_dsp_or_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
2088 {
2089 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2090 #ifdef __zcc__
2091  tpt_or_16bit(dst, src1, src2, size);
2092 #else
2093  riscv_dsp_or_u16(src1, src2, dst, size);
2094 #endif
2095 #endif
2096 }
2105 static inline void hpm_dsp_or_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
2106 {
2107 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2108 #ifdef __zcc__
2109  tpt_or_8bit(dst, src1, src2, size);
2110 #else
2111  riscv_dsp_or_u8(src1, src2, dst, size);
2112 #endif
2113 #endif
2114 }
2117 // XOR
2138 static inline void hpm_dsp_xor_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
2139 {
2140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2141 #ifdef __zcc__
2142  tpt_xor_32bit(dst, src1, src2, size);
2143 #else
2144  riscv_dsp_xor_u32(src1, src2, dst, size);
2145 #endif
2146 #endif
2147 }
2156 static inline void hpm_dsp_xor_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
2157 {
2158 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2159 #ifdef __zcc__
2160  tpt_xor_16bit(dst, src1, src2, size);
2161 #else
2162  riscv_dsp_xor_u16(src1, src2, dst, size);
2163 #endif
2164 #endif
2165 }
2174 static inline void hpm_dsp_xor_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
2175 {
2176 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2177 #ifdef __zcc__
2178  tpt_xor_8bit(dst, src1, src2, size);
2179 #else
2180  riscv_dsp_xor_u8(src1, src2, dst, size);
2181 #endif
2182 #endif
2183 }
2186 // Not
2206 static inline void hpm_dsp_not_u32(u32_t *src, u32_t *dst, uint32_t size)
2207 {
2208 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2209 #ifdef __zcc__
2210  tpt_not_32bit(dst, src, size);
2211 #else
2212  riscv_dsp_not_u32(src, dst, size);
2213 #endif
2214 #endif
2215 }
2223 static inline void hpm_dsp_not_u16(u16_t *src, u16_t *dst, uint32_t size)
2224 {
2225 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2226 #ifdef __zcc__
2227  tpt_not_16bit(dst, src, size);
2228 #else
2229  riscv_dsp_not_u16(src, dst, size);
2230 #endif
2231 #endif
2232 }
2240 static inline void hpm_dsp_not_u8(u8_t *src, u8_t *dst, uint32_t size)
2241 {
2242 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2243 #ifdef __zcc__
2244  tpt_not_8bit(dst, src, size);
2245 #else
2246  riscv_dsp_not_u8(src, dst, size);
2247 #endif
2248 #endif
2249 }
2254 #endif
2255 
2256 #include <stdint.h>
2257 
2265 
2273 
2281 
2289 
2290 #endif
2291 
2292 #ifdef HPM_MATH_DSP_COMPLEX
2293 
2303 #ifdef HPM_EN_MATH_DSP_LIB
2304 
2305 #ifdef __zcc__
2306 #include "tpt_math.h"
2307 #endif
2308 
2309 #include "riscv_dsp_complex_math.h"
2310 
2311 // Complex Conjugate
2318 static inline void hpm_dsp_cconj_f32(const float32_t *src, float32_t *dst, uint32_t size)
2319 {
2320 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2321 #ifdef __zcc__
2322  tpt_cmplx_conj_f32(dst, src, size);
2323 #else
2324  riscv_dsp_cconj_f32(src, dst, size);
2325 #endif
2326 #endif
2327 }
2328 
2338 static inline void hpm_dsp_cconj_q15(const q15_t *src, q15_t *dst, uint32_t size)
2339 {
2340 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2341 #ifdef __zcc__
2342  tpt_cmplx_conj_q15(dst, src, size);
2343 #else
2344  riscv_dsp_cconj_q15(src, dst, size);
2345 #endif
2346 #endif
2347 }
2348 
2358 static inline void hpm_dsp_cconj_q31(const q31_t *src, q31_t *dst, uint32_t size)
2359 {
2360 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2361 #ifdef __zcc__
2362  tpt_cmplx_conj_q31(dst, src, size);
2363 #else
2364  riscv_dsp_cconj_q31(src, dst, size);
2365 #endif
2366 #endif
2367 }
2368 
2369 // Complex Dot Product
2377 static inline void hpm_dsp_cdprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *dst)
2378 {
2379 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2380  riscv_dsp_cdprod_f32(src1, src2, size, dst);
2381 #endif
2382 }
2383 
2392 static inline void hpm_dsp_cdprod_typ2_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *rout, float32_t *iout)
2393 {
2394 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2395 #ifdef __zcc__
2396  tpt_cmplx_dot_prod_f32(rout, iout, src1, src2, size);
2397 #else
2398  riscv_dsp_cdprod_typ2_f32(src1, src2, size, rout, iout);
2399 #endif
2400 #endif
2401 }
2402 
2413 static inline void hpm_dsp_cdprod_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q15_t *dst)
2414 {
2415 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2416  riscv_dsp_cdprod_q15(src1, src2, size, dst);
2417 #endif
2418 }
2419 
2431 static inline void hpm_dsp_cdprod_typ2_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q31_t *rout, q31_t *iout)
2432 {
2433 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2434  riscv_dsp_cdprod_typ2_q15(src1, src2, size, rout, iout);
2435 #endif
2436 }
2437 
2448 static inline void hpm_dsp_cdprod_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q31_t *dst)
2449 {
2450 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2451  riscv_dsp_cdprod_q31(src1, src2, size, dst);
2452 #endif
2453 }
2454 
2455 
2467 static inline void hpm_dsp_cdprod_typ2_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q63_t *rout, q63_t *iout)
2468 {
2469 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2470 #ifdef __zcc__
2471  tpt_cmplx_dot_prod_q31(rout, iout, src1, src2, size);
2472 #else
2473  riscv_dsp_cdprod_typ2_q31(src1, src2, size, rout, iout);
2474 #endif
2475 #endif
2476 }
2477 
2478 // Complex Magnitude
2485 static inline void hpm_dsp_cmag_f32(const float32_t *src, float32_t *dst, uint32_t size)
2486 {
2487 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2488 #ifdef __zcc__
2489  tpt_cmplx_mag_f32(dst, src, size);
2490 #else
2491  riscv_dsp_cmag_f32(src, dst, size);
2492 #endif
2493 #endif
2494 }
2495 
2505 static inline void hpm_dsp_cmag_q15(const q15_t *src, q15_t *dst, uint32_t size)
2506 {
2507 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2508 #ifdef __zcc__
2509  tpt_cmplx_mag_q15(dst, src, size);
2510 #else
2511  riscv_dsp_cmag_q15(src, dst, size);
2512 #endif
2513 #endif
2514 }
2515 
2525 static inline void hpm_dsp_cmag_q31(const q31_t *src, q31_t *dst, uint32_t size)
2526 {
2527 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2528 #ifdef __zcc__
2529  tpt_cmplx_mag_q31(dst, src, size);
2530 #else
2531  riscv_dsp_cmag_q31(src, dst, size);
2532 #endif
2533 #endif
2534 }
2535 
2536 // Complex Magnitude Squared
2544 static inline void hpm_dsp_cmag_sqr_f32(const float32_t *src, float32_t *dst, uint32_t size)
2545 {
2546 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2547 #ifdef __zcc__
2548  tpt_cmplx_mag_squared_f32(dst, src, size);
2549 #else
2550  riscv_dsp_cmag_sqr_f32(src, dst, size);
2551 #endif
2552 #endif
2553 }
2554 
2564 static inline void hpm_dsp_cmag_sqr_q15(const q15_t *src, q15_t *dst, uint32_t size)
2565 {
2566 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2567 #ifdef __zcc__
2568  tpt_cmplx_mag_squared_q15(dst, src, size);
2569 #else
2570  riscv_dsp_cmag_sqr_q15(src, dst, size);
2571 #endif
2572 #endif
2573 }
2574 
2584 static inline void hpm_dsp_cmag_sqr_q31(const q31_t *src, q31_t *dst, uint32_t size)
2585 {
2586 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2587 #ifdef __zcc__
2588  tpt_cmplx_mag_squared_q31(dst, src, size);
2589 #else
2590  riscv_dsp_cmag_sqr_q31(src, dst, size);
2591 #endif
2592 #endif
2593 }
2594 
2595 // Complex Multiplication
2603 static inline void hpm_dsp_cmul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t size)
2604 {
2605 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2606 #ifdef __zcc__
2607  tpt_cmplx_mult_cmplx_f32(dst, src1, src2, size);
2608 #else
2609  riscv_dsp_cmul_f32(src1, src2, dst, size);
2610 #endif
2611 #endif
2612 }
2613 
2624 static inline void hpm_dsp_cmul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t size)
2625 {
2626 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2627 #ifdef __zcc__
2628  tpt_cmplx_mult_cmplx_q15(dst, src1, src2, size);
2629 #else
2630  riscv_dsp_cmul_q15(src1, src2, dst, size);
2631 #endif
2632 #endif
2633 }
2634 
2645 static inline void hpm_dsp_cmul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size)
2646 {
2647 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2648 #ifdef __zcc__
2649  tpt_cmplx_mult_cmplx_q31(dst, src1, src2, size);
2650 #else
2651  riscv_dsp_cmul_q31(src1, src2, dst, size);
2652 #endif
2653 #endif
2654 }
2655 
2656 // Complex-by-Real Multiplication
2664 static inline void hpm_dsp_cmul_real_f32(const float32_t *src, const float32_t *real, float32_t *dst, uint32_t size)
2665 {
2666 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2667 #ifdef __zcc__
2668  tpt_cmplx_mult_real_f32(dst, src, real, size);
2669 #else
2670  riscv_dsp_cmul_real_f32(src, real, dst, size);
2671 #endif
2672 #endif
2673 }
2674 
2684 static inline void hpm_dsp_cmul_real_q15(const q15_t *src, const q15_t *real, q15_t *dst, uint32_t size)
2685 {
2686 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2687 #ifdef __zcc__
2688  tpt_cmplx_mult_real_q15(dst, src, real, size);
2689 #else
2690  riscv_dsp_cmul_real_q15(src, real, dst, size);
2691 #endif
2692 #endif
2693 }
2694 
2704 static inline void hpm_dsp_cmul_real_q31(const q31_t *src, const q31_t *real, q31_t *dst, uint32_t size)
2705 {
2706 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2707 #ifdef __zcc__
2708  tpt_cmplx_mult_real_q31(dst, src, real, size);
2709 #else
2710  riscv_dsp_cmul_real_q31(src, real, dst, size);
2711 #endif
2712 #endif
2713 }
2714 #endif
2715 #endif
2716 
2722 #ifdef HPM_MATH_DSP_CONTROLLER
2723 
2729 #ifdef HPM_EN_MATH_DSP_LIB
2730 
2731 #include "riscv_dsp_controller_math.h"
2732 
2733 // Clarke Transform
2741 static inline void hpm_dsp_clarke_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta)
2742 {
2743 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2744  riscv_dsp_clarke_f32(a, b, alpha, beta);
2745 #endif
2746 }
2757 static inline void hpm_dsp_clarke_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta)
2758 {
2759 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2760  riscv_dsp_clarke_q31(a, b, alpha, beta);
2761 #endif
2762 }
2763 
2764 // Inverse Clarke Transform
2772 static inline void hpm_dsp_inv_clarke_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b)
2773 {
2774 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2775  riscv_dsp_inv_clarke_f32(alpha, beta, a, b);
2776 #endif
2777 }
2788 static inline void hpm_dsp_inv_clarke_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b)
2789 {
2790 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2791  riscv_dsp_inv_clarke_q31(alpha, beta, a, b);
2792 #endif
2793 }
2794 
2795 // Park Transform
2805 static inline void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
2806 {
2807 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2808  riscv_dsp_park_f32(alpha, beta, a, b, sin, cos);
2809 #endif
2810 }
2811 
2824 static inline void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
2825 {
2826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2827  riscv_dsp_park_q31(alpha, beta, a, b, sin, cos);
2828 #endif
2829 }
2830 
2831 // Inverse Park Transform
2841 static inline void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
2842 {
2843 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2844  riscv_dsp_inv_park_f32(a, b, alpha, beta, sin, cos);
2845 #endif
2846 }
2859 static inline void hpm_dsp_inv_park_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta, q31_t sin, q31_t cos)
2860 {
2861 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2862  riscv_dsp_inv_park_q31(a, b, alpha, beta, sin, cos);
2863 #endif
2864 }
2872 static inline float32_t hpm_dsp_pid_f32(riscv_dsp_pid_f32_t *instance, float32_t src)
2873 {
2874 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2875  return riscv_dsp_pid_f32(instance, src);
2876 #endif
2877 }
2890 static inline void hpm_dsp_init_pid_f32(riscv_dsp_pid_f32_t *instance, int32_t set)
2891 {
2892 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2893  riscv_dsp_init_pid_f32(instance, set);
2894 #endif
2895 }
2896 
2904 static inline q31_t hpm_dsp_pid_q31(riscv_dsp_pid_q31_t *instance, q31_t src)
2905 {
2906 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2907  return riscv_dsp_pid_q31(instance, src);
2908 #endif
2909 }
2910 
2923 static inline void hpm_dsp_init_pid_q31(riscv_dsp_pid_q31_t *instance, int32_t set)
2924 {
2925 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2926  riscv_dsp_init_pid_q31(instance, set);
2927 #endif
2928 }
2929 
2930 static inline q15_t hpm_dsp_pid_q15(riscv_dsp_pid_q15_t *instance, q15_t src)
2931 {
2932 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2933  return riscv_dsp_pid_q15(instance, src);
2934 #endif
2935 }
2948 static inline void hpm_dsp_init_pid_q15(riscv_dsp_pid_q15_t *instance, int32_t set)
2949 {
2950 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2951  riscv_dsp_init_pid_q15(instance, set);
2952 #endif
2953 }
2954 #endif
2955 #endif
2956 
2962 #ifdef HPM_MATH_DSP_DISTANCE
2963 
2969 #ifdef HPM_EN_MATH_DSP_LIB
2970 #ifdef __zcc__
2971 #include "tpt_math.h"
2972 #endif
2973 #include "riscv_dsp_distance_math.h"
2974 
2975 
2983 static inline float32_t hpm_dsp_dist_bray_curtis_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
2984 {
2985 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2986 #ifdef __zcc__
2987  return tpt_braycurtis_distance_f32(src1, src2, size);
2988 #else
2989  return riscv_dsp_dist_bray_curtis_f32(src1, src2, size);
2990 #endif
2991 #endif
2992 }
2993 
3001 static inline float32_t hpm_dsp_dist_canberra_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3002 {
3003 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3004 #ifdef __zcc__
3005  return tpt_canberra_distance_f32(src1, src2, size);
3006 #else
3007  return riscv_dsp_dist_canberra_f32(src1, src2, size);
3008 #endif
3009 #endif
3010 }
3011 
3019 static inline float32_t hpm_dsp_dist_chebyshev_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3020 {
3021 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3022 #ifdef __zcc__
3023  return tpt_chebyshev_distance_f32(src1, src2, size);
3024 #else
3025  return riscv_dsp_dist_chebyshev_f32(src1, src2, size);
3026 #endif
3027 #endif
3028 }
3029 
3037 static inline float32_t hpm_dsp_dist_city_block_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3038 {
3039 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3040 #ifdef __zcc__
3041  return tpt_cityblock_distance_f32(src1, src2, size);
3042 #else
3043  return riscv_dsp_dist_city_block_f32(src1, src2, size);
3044 #endif
3045 #endif
3046 }
3047 
3055 static inline float32_t hpm_dsp_dist_corr_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3056 {
3057 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3058 #ifdef __zcc__
3059  return tpt_correlation_distance_f32(src1, src2, size);
3060 #else
3061  return riscv_dsp_dist_corr_f32(src1, src2, size);
3062 #endif
3063 #endif
3064 }
3065 
3073 static inline float32_t hpm_dsp_dist_cos_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3074 {
3075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3076 #ifdef __zcc__
3077  return tpt_cosine_distance_f32(src1, src2, size);
3078 #else
3079  return riscv_dsp_dist_cos_f32(src1, src2, size);
3080 #endif
3081 #endif
3082 }
3083 
3091 static inline float32_t hpm_dsp_dist_euclidean_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3092 {
3093 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3094 #ifdef __zcc__
3095  return tpt_euclidean_distance_f32(src1, src2, size);
3096 #else
3097  return riscv_dsp_dist_euclidean_f32(src1, src2, size);
3098 #endif
3099 #endif
3100 }
3101 
3109 static inline float32_t hpm_dsp_dist_jensen_shannon_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3110 {
3111 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3112 #ifdef __zcc__
3113  return tpt_jensenshannon_distance_f32(src1, src2, size);
3114 #else
3115  return riscv_dsp_dist_jensen_shannon_f32(src1, src2, size);
3116 #endif
3117 #endif
3118 }
3119 
3128 static inline float32_t hpm_dsp_dist_minkowski_f32(const float32_t *src1, const float32_t *src2, int32_t order, uint32_t size)
3129 {
3130 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3131 #ifdef __zcc__
3132  return tpt_minkowski_distance_f32(src1, src2, order, size);
3133 #else
3134  return riscv_dsp_dist_minkowski_f32(src1, src2, order, size);
3135 #endif
3136 #endif
3137 }
3138 
3146 static inline float32_t hpm_dsp_bdist_dice_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3147 {
3148 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3149 #ifdef __zcc__
3150  return tpt_dice_distance(src1, src2, numofbool);
3151 #else
3152  return riscv_dsp_bdist_dice_u32_f32(src1, src2, numofbool);
3153 #endif
3154 #endif
3155 }
3156 
3164 static inline float32_t hpm_dsp_bdist_hamming_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3165 {
3166 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3167 #ifdef __zcc__
3168  return tpt_hamming_distance(src1, src2, numofbool);
3169 #else
3170  return riscv_dsp_bdist_hamming_u32_f32(src1, src2, numofbool);
3171 #endif
3172 #endif
3173 }
3174 
3182 static inline float32_t hpm_dsp_bdist_jaccard_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3183 {
3184 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3185 #ifdef __zcc__
3186  return tpt_jaccard_distance(src1, src2, numofbool);
3187 #else
3188  return riscv_dsp_bdist_jaccard_u32_f32(src1, src2, numofbool);
3189 #endif
3190 #endif
3191 }
3192 
3200 static inline float32_t hpm_dsp_bdist_kulsinski_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3201 {
3202 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3203 #ifdef __zcc__
3204  return tpt_kulsinski_distance(src1, src2, numofbool);
3205 #else
3206  return riscv_dsp_bdist_kulsinski_u32_f32(src1, src2, numofbool);
3207 #endif
3208 #endif
3209 }
3210 
3218 static inline float32_t hpm_dsp_bdist_sokal_michener_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3219 {
3220 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3221 #ifdef __zcc__
3222  return tpt_sokalmichener_distance(src1, src2, numofbool);
3223 #else
3224  return riscv_dsp_bdist_sokal_michener_u32_f32(src1, src2, numofbool);
3225 #endif
3226 #endif
3227 }
3228 
3236 static inline float32_t hpm_dsp_bdist_sokal_sneath_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3237 {
3238 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3239 #ifdef __zcc__
3240  return tpt_sokalsneath_distance(src1, src2, numofbool);
3241 #else
3242  return riscv_dsp_bdist_sokal_sneath_u32_f32(src1, src2, numofbool);
3243 #endif
3244 #endif
3245 }
3246 
3254 static inline float32_t hpm_dsp_bdist_rogers_tanimoto_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3255 {
3256 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3257 #ifdef __zcc__
3258  return tpt_rogerstanimoto_distance(src1, src2, numofbool);
3259 #else
3260  return riscv_dsp_bdist_rogers_tanimoto_u32_f32(src1, src2, numofbool);
3261 #endif
3262 #endif
3263 }
3264 
3272 static inline float32_t hpm_dsp_bdist_yule_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3273 {
3274 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3275 #ifdef __zcc__
3276  return tpt_yule_distance(src1, src2, numofbool);
3277 #else
3278  return riscv_dsp_bdist_yule_u32_f32(src1, src2, numofbool);
3279 #endif
3280 #endif
3281 }
3282 
3290 static inline float32_t hpm_dsp_bdist_russell_rao_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3291 {
3292 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3293 #ifdef __zcc__
3294  return tpt_russellrao_distance(src1, src2, numofbool);
3295 #else
3296  return riscv_dsp_bdist_russell_rao_u32_f32(src1, src2, numofbool);
3297 #endif
3298 #endif
3299 }
3300 #endif
3301 #endif
3302 
3308 #ifdef HPM_MATH_DSP_FILTERING
3309 
3315 #ifdef HPM_EN_MATH_DSP_LIB
3316 
3317 #ifdef __zcc__
3318 #include "tpt_math.h"
3319 #endif
3320 
3321 #include "riscv_dsp_filtering_math.h"
3322 
3330 static inline void hpm_dsp_fir_f32(const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3331 {
3332 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3333  riscv_dsp_fir_f32(instance, src, dst, size);
3334 #endif
3335 }
3336 
3351 static inline void hpm_dsp_fir_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3352 {
3353 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3354  riscv_dsp_fir_q31(instance, src, dst, size);
3355 #endif
3356 }
3357 
3372 static inline void hpm_dsp_fir_fast_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3373 {
3374 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3375  riscv_dsp_fir_fast_q31(instance, src, dst, size);
3376 #endif
3377 }
3392 static inline void hpm_dsp_fir_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3393 {
3394 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3395  riscv_dsp_fir_q15(instance, src, dst, size);
3396 #endif
3397 }
3398 
3411 static inline void hpm_dsp_fir_fast_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3412 {
3413 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3414  riscv_dsp_fir_fast_q15(instance, src, dst, size);
3415 #endif
3416 }
3430 static inline void hpm_dsp_fir_q7(const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t size)
3431 {
3432 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3433  riscv_dsp_fir_q7(instance, src, dst, size);
3434 #endif
3435 }
3436 
3445 static inline void hpm_dsp_lfir_f32(const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3446 {
3447 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3448  riscv_dsp_lfir_f32(instance, src, dst, size);
3449 #endif
3450 }
3451 
3460 static inline void hpm_dsp_lfir_q15(const riscv_dsp_lfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3461 {
3462 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3463  riscv_dsp_lfir_q15(instance, src, dst, size);
3464 #endif
3465 }
3466 
3479 static inline void hpm_dsp_lfir_q31(const riscv_dsp_lfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3480 {
3481 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3482  riscv_dsp_lfir_q31(instance, src, dst, size);
3483 #endif
3484 }
3485 static inline void hpm_dsp_dcmfir_f32(const riscv_dsp_dcmfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3486 {
3487 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3488  riscv_dsp_dcmfir_f32(instance, src, dst, size);
3489 #endif
3490 }
3491 static inline void hpm_dsp_dcmfir_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3492 {
3493 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3494  riscv_dsp_dcmfir_q15(instance, src, dst, size);
3495 #endif
3496 }
3497 static inline void hpm_dsp_dcmfir_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3498 {
3499 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3500  riscv_dsp_dcmfir_q31(instance, src, dst, size);
3501 #endif
3502 }
3503 static inline void hpm_dsp_dcmfir_fast_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3504 {
3505 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3506  riscv_dsp_dcmfir_fast_q31(instance, src, dst, size);
3507 #endif
3508 }
3509 static inline void hpm_dsp_dcmfir_fast_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3510 {
3511 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3512  riscv_dsp_dcmfir_fast_q15(instance, src, dst, size);
3513 #endif
3514 }
3515 static inline void hpm_dsp_upsplfir_f32(const riscv_dsp_upsplfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3516 {
3517 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3518  riscv_dsp_upsplfir_f32(instance, src, dst, size);
3519 #endif
3520 }
3521 static inline void hpm_dsp_upsplfir_q15(const riscv_dsp_upsplfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3522 {
3523 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3524  riscv_dsp_upsplfir_q15(instance, src, dst, size);
3525 #endif
3526 }
3527 static inline void hpm_dsp_upsplfir_q31(const riscv_dsp_upsplfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3528 {
3529 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3530  riscv_dsp_upsplfir_q31(instance, src, dst, size);
3531 #endif
3532 }
3533 static inline void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t size)
3534 {
3535 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3536  riscv_dsp_spafir_f32(instance, src, dst, buf, size);
3537 #endif
3538 }
3539 static inline void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t size)
3540 {
3541 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3542  riscv_dsp_spafir_q15(instance, src, dst, buf1, buf2, size);
3543 #endif
3544 }
3545 static inline void hpm_dsp_spafir_q31(riscv_dsp_spafir_q31_t *instance, q31_t *src, q31_t *dst, q31_t *buf, uint32_t size)
3546 {
3547 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3548  riscv_dsp_spafir_q31(instance, src, dst, buf, size);
3549 #endif
3550 }
3551 static inline void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t size)
3552 {
3553 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3554  riscv_dsp_spafir_q7(instance, src, dst, buf1, buf2, size);
3555 #endif
3556 }
3557 
3558 // Standard LMS filte
3572 static inline void hpm_dsp_lms_f32(const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
3573 {
3574 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3575  riscv_dsp_lms_f32(instance, src, ref, dst, err, size);
3576 #endif
3577 }
3578 
3595 static inline void hpm_dsp_lms_q31(const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
3596 {
3597 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3598  riscv_dsp_lms_q31(instance, src, ref, dst, err, size);
3599 #endif
3600 }
3601 
3618 static inline void hpm_dsp_lms_q15(const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
3619 {
3620 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3621  riscv_dsp_lms_q15(instance, src, ref, dst, err, size);
3622 #endif
3623 }
3624 
3629 static inline void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
3630 {
3631 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3632  riscv_dsp_nlms_f32(instance, src, ref, dst, err, size);
3633 #endif
3634 }
3635 
3636 
3641 static inline void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
3642 {
3643 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3644  riscv_dsp_nlms_q31(instance, src, ref, dst, err, size);
3645 #endif
3646 }
3647 
3648 
3649 static inline void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
3650 {
3651 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3652  riscv_dsp_nlms_q15(instance, src, ref, dst, err, size);
3653 #endif
3654 }
3655 
3656 
3657 // Convolution
3667 static inline void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3668 {
3669 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3670 #ifdef __zcc__
3671  tpt_conv_f32(dst, src1, len1, src2, len2);
3672 #else
3673  riscv_dsp_conv_f32(src1, len1, src2, len2, dst);
3674 #endif
3675 #endif
3676 }
3677 
3693 static inline void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3694 {
3695 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3696 #ifdef __zcc__
3697  tpt_conv_q15(dst, src1, len1, src2, len2);
3698 #else
3699  riscv_dsp_conv_q15(src1, len1, src2, len2, dst);
3700 #endif
3701 #endif
3702 }
3703 
3721 static inline void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3722 {
3723 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3724 #ifdef __zcc__
3725  tpt_conv_q31(dst, src1, len1, src2, len2);
3726 #else
3727  riscv_dsp_conv_q31(src1, len1, src2, len2, dst);
3728 #endif
3729 #endif
3730 }
3731 
3747 static inline void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3748 {
3749 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3750 #ifdef __zcc__
3751  tpt_conv_q7(dst, src1, len1, src2, len2);
3752 #else
3753  riscv_dsp_conv_q7(src1, len1, src2, len2, dst);
3754 #endif
3755 #endif
3756 }
3757 
3772 static inline int32_t hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t size)
3773 {
3774 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3775 #ifdef __zcc__
3776  return tpt_conv_partial_f32(dst, src1, len1, src2, len2, startindex, size);
3777 #else
3778  return riscv_dsp_conv_partial_f32(src1, len1, src2, len2, dst, startindex,
3779  size);
3780 #endif
3781 #endif
3782 }
3783 
3798 static inline int32_t hpm_dsp_conv_partial_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst, uint32_t startindex, uint32_t size)
3799 {
3800 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3801 #ifdef __zcc__
3802  return tpt_conv_partial_q15(dst, src1, len1, src2, len2, startindex, size);
3803 #else
3804  return riscv_dsp_conv_partial_q15(src1, len1, src2, len2, dst, startindex,
3805  size);
3806 #endif
3807 #endif
3808 }
3809 
3824 static inline int32_t hpm_dsp_conv_partial_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst, uint32_t startindex, uint32_t size)
3825 {
3826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3827 #ifdef __zcc__
3828  return tpt_conv_partial_q31(dst, src1, len1, src2, len2, startindex, size);
3829 #else
3830  return riscv_dsp_conv_partial_q31(src1, len1, src2, len2, dst, startindex,
3831  size);
3832 #endif
3833 #endif
3834 }
3835 
3850 static inline int32_t hpm_dsp_conv_partial_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst, uint32_t startindex, uint32_t size)
3851 {
3852 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3853 #ifdef __zcc__
3854  return tpt_conv_partial_q7(dst, src1, len1, src2, len2, startindex, size);
3855 #else
3856  return riscv_dsp_conv_partial_q7(src1, len1, src2, len2, dst, startindex,
3857  size);
3858 #endif
3859 #endif
3860 }
3861 
3862 
3863 // Correlation
3873 static inline void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3874 {
3875 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3876 #ifdef __zcc__
3877  tpt_correlate_f32(dst, src1, len1, src2, len2);
3878 #else
3879  riscv_dsp_corr_f32(src1, len1, src2, len2, dst);
3880 #endif
3881 #endif
3882 }
3883 
3899 static inline void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3900 {
3901 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3902 #ifdef __zcc__
3903  tpt_correlate_q15(dst, src1, len1, src2, len2);
3904 #else
3905  riscv_dsp_corr_q15(src1, len1, src2, len2, dst);
3906 #endif
3907 #endif
3908 }
3909 
3929 static inline void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3930 {
3931 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3932 #ifdef __zcc__
3933  tpt_correlate_q31(dst, src1, len1, src2, len2);
3934 #else
3935  riscv_dsp_corr_q31(src1, len1, src2, len2, dst);
3936 #endif
3937 #endif
3938 }
3939 
3955 static inline void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3956 {
3957 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3958 #ifdef __zcc__
3959  tpt_correlate_q7(dst, src1, len1, src2, len2);
3960 #else
3961  riscv_dsp_corr_q7(src1, len1, src2, len2, dst);
3962 #endif
3963 #endif
3964 }
3965 static inline void hpm_dsp_bq_df1_f32(const riscv_dsp_bq_df1_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3966 {
3967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3968  riscv_dsp_bq_df1_f32(instance, src, dst, size);
3969 #endif
3970 }
3971 static inline void hpm_dsp_bq_df1_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3972 {
3973 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3974  riscv_dsp_bq_df1_q15(instance, src, dst, size);
3975 #endif
3976 }
3977 static inline void hpm_dsp_bq_df1_fast_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3978 {
3979 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3980  riscv_dsp_bq_df1_fast_q15(instance, src, dst, size);
3981 #endif
3982 }
3983 static inline void hpm_dsp_bq_df1_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3984 {
3985 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3986  riscv_dsp_bq_df1_q31(instance, src, dst, size);
3987 #endif
3988 }
3989 static inline void hpm_dsp_bq_df1_fast_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3990 {
3991 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3992  riscv_dsp_bq_df1_fast_q31(instance, src, dst, size);
3993 #endif
3994 }
3995 static inline void hpm_dsp_bq_df1_32x64_q31(const riscv_dsp_bq_df1_32x64_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3996 {
3997 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3998  riscv_dsp_bq_df1_32x64_q31(instance, src, dst, size);
3999 #endif
4000 }
4001 static inline void hpm_dsp_bq_df2T_f32(const riscv_dsp_bq_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
4002 {
4003 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4004  riscv_dsp_bq_df2T_f32(instance, src, dst, size);
4005 #endif
4006 }
4007 static inline void hpm_dsp_bq_df2T_f64(const riscv_dsp_bq_df2T_f64_t *instance, float64_t *src, float64_t *dst, uint32_t size)
4008 {
4009 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4010  riscv_dsp_bq_df2T_f64(instance, src, dst, size);
4011 #endif
4012 }
4013 static inline void hpm_dsp_bq_stereo_df2T_f32(const riscv_dsp_bq_stereo_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
4014 {
4015 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4016  riscv_dsp_bq_stereo_df2T_f32(instance, src, dst, size);
4017 #endif
4018 }
4019 
4020 static inline void hpm_dsp_liir_f32(const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
4021 {
4022 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4023  riscv_dsp_liir_f32(instance, src, dst, size);
4024 #endif
4025 }
4026 static inline void hpm_dsp_liir_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
4027 {
4028 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4029  riscv_dsp_liir_q31(instance, src, dst, size);
4030 #endif
4031 }
4032 static inline void hpm_dsp_liir_fast_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
4033 {
4034 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4035  riscv_dsp_liir_fast_q31(instance, src, dst, size);
4036 #endif
4037 }
4038 static inline void hpm_dsp_liir_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
4039 {
4040 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4041  riscv_dsp_liir_q15(instance, src, dst, size);
4042 #endif
4043 }
4044 static inline void hpm_dsp_liir_fast_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
4045 {
4046 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4047  riscv_dsp_liir_fast_q15(instance, src, dst, size);
4048 #endif
4049 }
4050 #endif
4051 #endif
4052 
4058 #ifdef HPM_MATH_DSP_MATRIX
4059 
4083 #ifdef HPM_EN_MATH_DSP_LIB
4084 #ifdef __zcc__
4085 #include "tpt_math.h"
4086 #endif
4087 #include "riscv_dsp_matrix_math.h"
4088 
4089 // Matrix Addition
4098 static inline void hpm_dsp_mat_add_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4099 {
4100 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4101 #ifdef __zcc__
4102  tpt_mat_add_f32(dst, src1, src2, row, col);
4103 #else
4104  riscv_dsp_mat_add_f32(src1, src2, dst, row, col);
4105 #endif
4106 #endif
4107 }
4108 
4117  static inline void hpm_dsp_mat_add_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
4118  {
4119  #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4120  #ifdef __zcc__
4121  tpt_mat_add_f64(dst, src1, src2, row, col);
4122  #else
4123  riscv_dsp_mat_add_f64(src1, src2, dst, row, col);
4124  #endif
4125  #endif
4126  }
4127 
4138 static inline void hpm_dsp_mat_add_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4139 {
4140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4141 #ifdef __zcc__
4142  tpt_mat_add_q15(dst, src1, src2, row, col);
4143 #else
4144  riscv_dsp_mat_add_q15(src1, src2, dst, row, col);
4145 #endif
4146 #endif
4147 }
4148 
4159 static inline void hpm_dsp_mat_add_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4160 {
4161 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4162 #ifdef __zcc__
4163  tpt_mat_add_q31(dst, src1, src2, row, col);
4164 #else
4165  riscv_dsp_mat_add_q31(src1, src2, dst, row, col);
4166 #endif
4167 #endif
4168 }
4169 
4170 // Matrix Inverse
4178 static inline int32_t hpm_dsp_mat_inv_f32(float32_t *src, float32_t *dst, uint32_t size)
4179 {
4180 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4181 #ifdef __zcc__
4182  return tpt_mat_inverse_f32(dst, src, size);
4183 #else
4184  return riscv_dsp_mat_inv_f32(src, dst, size);
4185 #endif
4186 #endif
4187 }
4188 static inline int32_t hpm_dsp_mat_inv_f64(float64_t *src, float64_t *dst, uint32_t size)
4189 {
4190 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4191 #ifdef __zcc__
4192  return tpt_mat_inverse_f64(dst, src, size);
4193 #else
4194  return riscv_dsp_mat_inv_f64(src, dst, size);
4195 #endif
4196 #endif
4197 }
4198 
4199 // Matrix Multiplication
4209 static inline void hpm_dsp_mat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4210 {
4211 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4212 #ifdef __zcc__
4213  return tpt_mat_mult_f32(dst, src1, src2, row, col, col2);
4214 #else
4215  riscv_dsp_mat_mul_f32(src1, src2, dst, row, col, col2);
4216 #endif
4217 #endif
4218 }
4219 
4220 static inline void hpm_dsp_mat_mul_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4221 {
4222 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4223 #ifdef __zcc__
4224  return tpt_mat_mult_f64(dst, src1, src2, row, col, col2);
4225 #else
4226  riscv_dsp_mat_mul_f64(src1, src2, dst, row, col, col2);
4227 #endif
4228 #endif
4229 }
4230 
4240 static inline void hpm_dsp_cmat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4241 {
4242 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4243 #ifdef __zcc__
4244  return tpt_mat_cmplx_mult_f32(dst, src1, src2, row, col, col2);
4245 #else
4246  riscv_dsp_cmat_mul_f32(src1, src2, dst, row, col, col2);
4247 #endif
4248 #endif
4249 }
4250 
4267 static inline void hpm_dsp_mat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4268 {
4269 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4270 #ifdef __zcc__
4271  return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4272 #else
4273  riscv_dsp_mat_mul_q15(src1, src2, dst, row, col, col2);
4274 #endif
4275 #endif
4276 }
4277 static inline void hpm_dsp_mat_mul_fast_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4278 {
4279 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4280 #ifdef __zcc__
4281  return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4282 #else
4283  riscv_dsp_mat_mul_fast_q15(src1, src2, dst, row, col, col2);
4284 #endif
4285 #endif
4286 }
4287 
4304 static inline void hpm_dsp_cmat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4305 {
4306 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4307 #ifdef __zcc__
4308  return tpt_mat_cmplx_mult_q15(dst, src1, src2, row, col, col2);
4309 #else
4310  riscv_dsp_cmat_mul_q15(src1, src2, dst, row, col, col2);
4311 #endif
4312 #endif
4313 }
4314 
4331 static inline void hpm_dsp_mat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4332 {
4333 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4334 #ifdef __zcc__
4335  return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4336 #else
4337  riscv_dsp_mat_mul_q31(src1, src2, dst, row, col, col2);
4338 #endif
4339 #endif
4340 }
4341 static inline void hpm_dsp_mat_mul_fast_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4342 {
4343 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4344 #ifdef __zcc__
4345  return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4346 #else
4347  riscv_dsp_mat_mul_fast_q31(src1, src2, dst, row, col, col2);
4348 #endif
4349 #endif
4350 }
4351 
4368 static inline void hpm_dsp_cmat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4369 {
4370 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4371 #ifdef __zcc__
4372  return tpt_mat_cmplx_mult_q31(dst, src1, src2, row, col, col2);
4373 #else
4374  riscv_dsp_cmat_mul_q31(src1, src2, dst, row, col, col2);
4375 #endif
4376 #endif
4377 }
4378 
4395 static inline void hpm_dsp_mat_mul_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4396 {
4397 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4398  riscv_dsp_mat_mul_q7(src1, src2, dst, row, col, col2);
4399 #endif
4400 }
4401 
4417 static inline void hpm_dsp_mat_mul_vxm_q7(const q7_t * src1, const q7_t * src2, q7_t * dst, uint32_t col, uint32_t col2)
4418 {
4419 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4420 #ifdef __zcc__
4421  tpt_mat_mul_mxv_q7(dst, src1, src2, col, col2);
4422 #else
4423  riscv_dsp_mat_mul_vxm_q7(src1, src2, dst, col, col2);
4424 #endif
4425 #endif
4426 }
4427 
4428 // Matrix Power 2 Function
4429 //
4430 // The input is a square matrix for riscv_dsp_mat_pow2_cache_f64.
4431 static inline int32_t hpm_dsp_mat_pwr2_cache_f64(const float64_t *src, float64_t *dst, uint32_t size)
4432 {
4433 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4434  return riscv_dsp_mat_pwr2_cache_f64(src, dst, size);
4435 #endif
4436 }
4437 
4438 // Matrix Scale
4447 static inline void hpm_dsp_mat_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
4448 {
4449 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4450 #ifdef __zcc__
4451  tpt_mat_scale_f32(dst, src, row, col, scale);
4452 #else
4453  riscv_dsp_mat_scale_f32(src, scale, dst, row, col);
4454 #endif
4455 #endif
4456 }
4457 
4472 static inline void hpm_dsp_mat_scale_q15(const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
4473 {
4474 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4475 #ifdef __zcc__
4476  tpt_mat_scale_q15(dst, src, row, col, scale_fract, shift);
4477 #else
4478  riscv_dsp_mat_scale_q15(src, scale_fract, shift, dst, row, col);
4479 #endif
4480 #endif
4481 }
4482 
4497 static inline void hpm_dsp_mat_scale_q31(const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
4498 {
4499 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4500 #ifdef __zcc__
4501  tpt_mat_scale_q31(dst, src, row, col, scale_fract, shift);
4502 #else
4503  riscv_dsp_mat_scale_q31(src, scale_fract, shift, dst, row, col);
4504 #endif
4505 #endif
4506 }
4507 
4508 // Matrix Subtraction
4509 
4519 static inline void hpm_dsp_mat_sub_f64(const float64_t *src1, const float64_t *src2,
4520  float64_t *dst, uint32_t row, uint32_t col)
4521 {
4522 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4523 #ifdef __zcc__
4524  tpt_mat_sub_f64(dst, src1, src2, row, col);
4525 #else
4526  riscv_dsp_mat_sub_f64(src1, src2, dst, row, col);
4527 #endif
4528 #endif
4529 }
4530 
4539 static inline void hpm_dsp_mat_sub_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4540 {
4541 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4542 #ifdef __zcc__
4543  tpt_mat_sub_f32(dst, src1, src2, row, col);
4544 #else
4545  riscv_dsp_mat_sub_f32(src1, src2, dst, row, col);
4546 #endif
4547 #endif
4548 }
4549 
4560 static inline void hpm_dsp_mat_sub_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4561 {
4562 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4563 #ifdef __zcc__
4564  tpt_mat_sub_q15(dst, src1, src2, row, col);
4565 #else
4566  riscv_dsp_mat_sub_q15(src1, src2, dst, row, col);
4567 #endif
4568 #endif
4569 }
4570 
4581 static inline void hpm_dsp_mat_sub_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4582 {
4583 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4584 #ifdef __zcc__
4585  tpt_mat_sub_q31(dst, src1, src2, row, col);
4586 #else
4587  riscv_dsp_mat_sub_q31(src1, src2, dst, row, col);
4588 #endif
4589 #endif
4590 }
4591 
4592 // Matrix Transpose
4593 
4602 static inline void hpm_dsp_mat_trans_f64(const float64_t *src, float64_t *dst, uint32_t row, uint32_t col)
4603 {
4604 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4605 #ifdef __zcc__
4606  tpt_mat_trans_f64(dst, src, row, col);
4607 #else
4608  riscv_dsp_mat_trans_f64(src, dst, row, col);
4609 #endif
4610 #endif
4611 }
4612 
4620 static inline void hpm_dsp_mat_trans_f32(const float32_t *src, float32_t *dst, uint32_t row, uint32_t col)
4621 {
4622 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4623  riscv_dsp_mat_trans_f32(src, dst, row, col);
4624 #endif
4625 }
4626 
4634 static inline void hpm_dsp_mat_trans_q15(const q15_t *src, q15_t *dst, uint32_t row, uint32_t col)
4635 {
4636 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4637 #ifdef __zcc__
4638  tpt_mat_trans_q15(dst, src, row, col);
4639 #else
4640  riscv_dsp_mat_trans_q15(src, dst, row, col);
4641 #endif
4642 #endif
4643 }
4644 
4652 static inline void hpm_dsp_mat_trans_q31(const q31_t *src, q31_t *dst, uint32_t row, uint32_t col)
4653 {
4654 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4655 #ifdef __zcc__
4656  tpt_mat_trans_q31(dst, src, row, col);
4657 #else
4658  riscv_dsp_mat_trans_q31(src, dst, row, col);
4659 #endif
4660 #endif
4661 }
4662 
4670 static inline void hpm_dsp_mat_trans_u8(const uint8_t *src, uint8_t *dst, uint32_t row, uint32_t col)
4671 {
4672 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4673  riscv_dsp_mat_trans_u8(src, dst, row, col);
4674 #endif
4675 }
4676 
4685 static inline void hpm_dsp_mat_trans_q7(const q7_t *src, q7_t *dst, uint32_t row, uint32_t col)
4686 {
4687 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4688  riscv_dsp_mat_trans_q7(src, dst, row, col);
4689 #endif
4690 }
4691 
4723 static inline void hpm_dsp_mat_oprod_q31(const q31_t * src1, const q31_t * src2,
4724  q31_t * dst, uint32_t size1, uint32_t size2)
4725 {
4726 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4727 #ifdef __zcc__
4728  tpt_mat_oprod_q31(dst, src1, src2, size1, size2);
4729 #else
4730  riscv_dsp_mat_oprod_q31(src1, src2, dst, size1, size2);
4731 #endif
4732 #endif
4733 }
4734 
4757 static inline void hpm_dsp_mat_mul_mxv_f32(const float32_t *src1, const float32_t *src2,
4758  float32_t *dst, uint32_t row, uint32_t col)
4759 {
4760 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4761 #ifdef __zcc__
4762  tpt_mat_mul_mxv_f32(dst, src1, src2, row, col);
4763 #else
4764  riscv_dsp_mat_mul_mxv_f32(src1, src2, dst, row, col);
4765 #endif
4766 #endif
4767 }
4777 static inline void hpm_dsp_mat_mul_mxv_q15(const q15_t *src1, const q15_t *src2,
4778  q15_t *dst, uint32_t row, uint32_t col)
4779 {
4780 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4781 #ifdef __zcc__
4782  tpt_mat_mul_mxv_q15(dst, src1, src2, row, col);
4783 #else
4784  riscv_dsp_mat_mul_mxv_q15(src1, src2, dst, row, col);
4785 #endif
4786 #endif
4787 }
4797 static inline void hpm_dsp_mat_mul_mxv_q31(const q31_t *src1, const q31_t *src2,
4798  q31_t *dst, uint32_t row, uint32_t col)
4799 {
4800 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4801 #ifdef __zcc__
4802  tpt_mat_mul_mxv_q31(dst, src1, src2, row, col);
4803 #else
4804  riscv_dsp_mat_mul_mxv_q31(src1, src2, dst, row, col);
4805 #endif
4806 #endif
4807 }
4817 static inline void hpm_dsp_mat_mul_mxv_q7(const q7_t *src1, const q7_t *src2,
4818  q7_t *dst, uint32_t row, uint32_t col)
4819 {
4820 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4821 #ifdef __zcc__
4822  tpt_mat_mul_mxv_q7(dst, src1, src2, row, col);
4823 #else
4824  riscv_dsp_mat_mul_mxv_q7(src1, src2, dst, row, col);
4825 #endif
4826 #endif
4827 }
4828 
4829 #endif
4830 #endif
4831 
4837 #ifdef HPM_MATH_DSP_SVM
4838 
4845 #ifdef HPM_EN_MATH_DSP_LIB
4846 #ifdef __zcc__
4847 #include "tpt_math.h"
4848 #endif
4849 #include "riscv_dsp_svm_math.h"
4857 static inline void hpm_dsp_svm_linear_est_f32(const riscv_dsp_svm_linear_f32_t *instance, const float32_t *src, int32_t *result)
4858 {
4859 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4860  riscv_dsp_svm_linear_est_f32(instance, src, result);
4861 #endif
4862 }
4863 
4871 static inline void hpm_dsp_svm_sigmoid_est_f32(const riscv_dsp_svm_sigmoid_f32_t *instance, const float32_t *src, int32_t *result)
4872 {
4873 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4874  riscv_dsp_svm_sigmoid_est_f32(instance, src, result);
4875 #endif
4876 }
4877 
4885 static inline void hpm_dsp_svm_rbf_est_f32(const riscv_dsp_svm_rbf_f32_t *instance, const float32_t *src, int32_t *result)
4886 {
4887 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4888  riscv_dsp_svm_rbf_est_f32(instance, src, result);
4889 #endif
4890 }
4891 
4899 static inline void hpm_dsp_svm_poly_est_f32(const riscv_dsp_svm_poly_f32_t *instance, const float32_t *src, int32_t *result)
4900 {
4901 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4902  riscv_dsp_svm_poly_est_f32(instance, src, result);
4903 #endif
4904 }
4905 
4906 #endif
4907 #endif
4908 
4914 #ifdef HPM_MATH_DSP_TRANSFORM
4915 
4921 #ifdef HPM_EN_MATH_DSP_LIB
4922 #ifdef __zcc__
4923 #include "tpt_math.h"
4924 #endif
4925 #include "riscv_dsp_transform_math.h"
4955 static inline int32_t hpm_dsp_cfft_rd2_f32(float32_t *src, uint32_t m)
4956 {
4957 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4958 #ifdef __zcc__
4959  return tpt_cfft_f32(src, m, false);
4960 #else
4961  return riscv_dsp_cfft_rd2_f32(src, m);
4962 #endif
4963 #endif
4964 }
4965 
4973 static inline int32_t hpm_dsp_cifft_rd2_f32(float32_t *src, uint32_t m)
4974 {
4975 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4976 #ifdef __zcc__
4977  return tpt_cfft_f32(src, m, true);
4978 #else
4979  return riscv_dsp_cifft_rd2_f32(src, m);
4980 #endif
4981 
4982 #endif
4983 }
4984 
4998 static inline int32_t hpm_dsp_cfft_rd2_q15(q15_t *src, uint32_t m)
4999 {
5000 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5001 #ifdef __zcc__
5002  return tpt_cfft_q15(src, m, false);
5003 #else
5004  return riscv_dsp_cfft_rd2_q15(src, m);
5005 #endif
5006 #endif
5007 }
5008 
5022 static inline int32_t hpm_dsp_cifft_rd2_q15(q15_t *src, uint32_t m)
5023 {
5024 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5025 #ifdef __zcc__
5026  return tpt_cfft_q15(src, m, true);
5027 #else
5028  return riscv_dsp_cifft_rd2_q15(src, m);
5029 #endif
5030 #endif
5031 }
5032 
5046 static inline int32_t hpm_dsp_cfft_rd2_q31(q31_t *src, uint32_t m)
5047 {
5048 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5049 #ifdef __zcc__
5050  return tpt_cfft_q31(src, m, false);
5051 #else
5052  return riscv_dsp_cfft_rd2_q31(src, m);
5053 #endif
5054 
5055 #endif
5056 }
5057 
5071 static inline int32_t hpm_dsp_cifft_rd2_q31(q31_t *src, uint32_t m)
5072 {
5073 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5074 #ifdef __zcc__
5075  return tpt_cfft_q31(src, m, true);
5076 #else
5077  return riscv_dsp_cifft_rd2_q31(src, m);
5078 #endif
5079 
5080 #endif
5081 }
5082 
5112 static inline int32_t hpm_dsp_cfft_rd4_f32(float32_t *src, uint32_t m)
5113 {
5114 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5115 #ifdef __zcc__
5116  return tpt_cfft_f32(src, m, false);
5117 #else
5118  return riscv_dsp_cfft_rd4_f32(src, m);
5119 #endif
5120 
5121 #endif
5122 }
5123 
5131 static inline int32_t hpm_dsp_cifft_rd4_f32(float32_t *src, uint32_t m)
5132 {
5133 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5134 #ifdef __zcc__
5135  return tpt_cfft_f32(src, m, true);
5136 #else
5137  return riscv_dsp_cifft_rd4_f32(src, m);
5138 #endif
5139 #endif
5140 }
5141 
5155 static inline int32_t hpm_dsp_cfft_rd4_q15(q15_t *src, uint32_t m)
5156 {
5157 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5158 #ifdef __zcc__
5159  return tpt_cfft_q15(src, m, false);
5160 #else
5161  return riscv_dsp_cfft_rd4_q15(src, m);
5162 #endif
5163 #endif
5164 }
5165 
5179 static inline int32_t hpm_dsp_cifft_rd4_q15(q15_t *src, uint32_t m)
5180 {
5181 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5182 #ifdef __zcc__
5183  return tpt_cfft_q15(src, m, true);
5184 #else
5185  return riscv_dsp_cifft_rd4_q15(src, m);
5186 #endif
5187 #endif
5188 }
5189 
5203 static inline int32_t hpm_dsp_cfft_rd4_q31(q31_t *src, uint32_t m)
5204 {
5205 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5206 #ifdef __zcc__
5207  return tpt_cfft_q31(src, m, false);
5208 #else
5209  return riscv_dsp_cfft_rd4_q31(src, m);
5210 #endif
5211 #endif
5212 }
5213 
5227 static inline int32_t hpm_dsp_cifft_rd4_q31(q31_t *src, uint32_t m)
5228 {
5229 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5230 #ifdef __zcc__
5231  return tpt_cfft_q31(src, m, true);
5232 #else
5233  return riscv_dsp_cifft_rd4_q31(src, m);
5234 #endif
5235 #endif
5236 }
5237 
5258 static inline void hpm_dsp_cfft_f32(float32_t *src, uint32_t m)
5259 {
5260 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5261 #ifdef __zcc__
5262  tpt_cfft_f32(src, m, false);
5263 #else
5264  riscv_dsp_cfft_f32(src, m);
5265 #endif
5266 #endif
5267 }
5268 
5275 static inline void hpm_dsp_cfft_f64(float64_t *src, uint32_t m)
5276 {
5277 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5278 #ifdef __zcc__
5279  tpt_cfft_f64(src, m, false);
5280 #else
5281  riscv_dsp_cfft_f64(src, m);
5282 #endif
5283 #endif
5284 }
5285 
5292 static inline void hpm_dsp_cifft_f32(float32_t *src, uint32_t m)
5293 {
5294 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5295 #ifdef __zcc__
5296  tpt_cfft_f32(src, m, true);
5297 #else
5298  riscv_dsp_cifft_f32(src, m);
5299 #endif
5300 #endif
5301 }
5302 
5309 static inline void hpm_dsp_cifft_f64(float64_t *src, uint32_t m)
5310 {
5311 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5312 #ifdef __zcc__
5313  tpt_cfft_f64(src, m, true);
5314 #else
5315  riscv_dsp_cifft_f64(src, m);
5316 #endif
5317 #endif
5318 }
5319 
5320 
5333 static inline void hpm_dsp_cfft_q15(q15_t *src, uint32_t m)
5334 {
5335 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5336 #ifdef __zcc__
5337  tpt_cfft_q15(src, m, false);
5338 #else
5339  riscv_dsp_cfft_q15(src, m);
5340 #endif
5341 #endif
5342 }
5343 
5356 static inline void hpm_dsp_cifft_q15(q15_t *src, uint32_t m)
5357 {
5358 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5359 #ifdef __zcc__
5360  tpt_cfft_q15(src, m, true);
5361 #else
5362  riscv_dsp_cifft_q15(src, m);
5363 #endif
5364 #endif
5365 }
5366 
5379 static inline void hpm_dsp_cfft_q31(q31_t *src, uint32_t m)
5380 {
5381 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5382 #ifdef __zcc__
5383  tpt_cfft_q31(src, m, false);
5384 #else
5385  riscv_dsp_cfft_q31(src, m);
5386 #endif
5387 #endif
5388 }
5389 
5402 static inline void hpm_dsp_cifft_q31(q31_t *src, uint32_t m)
5403 {
5404 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5405 #ifdef __zcc__
5406  tpt_cfft_q31(src, m, true);
5407 #else
5408  riscv_dsp_cifft_q31(src, m);
5409 #endif
5410 #endif
5411 }
5412 
5442 static inline int32_t hpm_dsp_rfft_f32(float32_t *src, uint32_t m)
5443 {
5444 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5445 #ifdef __zcc__
5446  return tpt_rfft_f32(src, src, m, false);
5447 #else
5448  return riscv_dsp_rfft_f32(src, m);
5449 #endif
5450 #endif
5451 }
5452 
5460 static inline int32_t hpm_dsp_rfft_f64(float64_t *src, uint32_t m)
5461 {
5462 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5463  return riscv_dsp_rfft_f64(src, m);
5464 #endif
5465 }
5466 
5474 static inline int32_t hpm_dsp_rifft_f32(float32_t *src, uint32_t m)
5475 {
5476 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5477  return riscv_dsp_rifft_f32(src, m);
5478 #endif
5479 }
5480 
5488 static inline int32_t hpm_dsp_rifft_f64(float64_t *src, uint32_t m)
5489 {
5490 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5491  return riscv_dsp_rifft_f64(src, m);
5492 #endif
5493 }
5494 
5508 static inline int32_t hpm_dsp_rfft_q15(q15_t *src, uint32_t m)
5509 {
5510 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5511  return riscv_dsp_rfft_q15(src, m);
5512 #endif
5513 }
5514 
5528 static inline int32_t hpm_dsp_rifft_q15(q15_t *src, uint32_t m)
5529 {
5530 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5531  return riscv_dsp_rifft_q15(src, m);
5532 #endif
5533 }
5534 
5548 static inline int32_t hpm_dsp_rfft_q31(q31_t *src, uint32_t m)
5549 {
5550 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5551  return riscv_dsp_rfft_q31(src, m);
5552 #endif
5553 }
5554 
5568 static inline int32_t hpm_dsp_rifft_q31(q31_t *src, uint32_t m)
5569 {
5570 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5571  return riscv_dsp_rifft_q31(src, m);
5572 #endif
5573 }
5574 
5593 static inline void hpm_dsp_dct_f32(float32_t *src, uint32_t m)
5594 {
5595 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5596  riscv_dsp_dct_f32(src, m);
5597 #endif
5598 }
5599 
5606 static inline void hpm_dsp_idct_f32(float32_t *src, uint32_t m)
5607 {
5608 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5609  riscv_dsp_idct_f32(src, m);
5610 #endif
5611 }
5612 
5625 static inline void hpm_dsp_dct_q15(q15_t *src, uint32_t m)
5626 {
5627 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5628  riscv_dsp_dct_q15(src, m);
5629 #endif
5630 }
5631 
5644 static inline void hpm_dsp_idct_q15(q15_t *src, uint32_t m)
5645 {
5646 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5647  riscv_dsp_idct_q15(src, m);
5648 #endif
5649 }
5650 
5663 static inline void hpm_dsp_dct_q31(q31_t *src, uint32_t m)
5664 {
5665 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5666  riscv_dsp_dct_q31(src, m);
5667 #endif
5668 }
5669 
5682 static inline void hpm_dsp_idct_q31(q31_t *src, uint32_t m)
5683 {
5684 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5685  riscv_dsp_idct_q31(src, m);
5686 #endif
5687 }
5688 
5707 static inline void hpm_dsp_dct4_f32(float32_t *src, uint32_t m)
5708 {
5709 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5710  riscv_dsp_dct4_f32(src, m);
5711 #endif
5712 }
5713 
5720 static inline void hpm_dsp_idct4_f32(float32_t *src, uint32_t m)
5721 {
5722 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5723  riscv_dsp_idct4_f32(src, m);
5724 #endif
5725 }
5726 
5739 static inline void hpm_dsp_dct4_q15(q15_t *src, uint32_t m)
5740 {
5741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5742  riscv_dsp_dct4_q15(src, m);
5743 #endif
5744 }
5745 
5758 static inline void hpm_dsp_idct4_q15(q15_t *src, uint32_t m)
5759 {
5760 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5761  riscv_dsp_idct4_q15(src, m);
5762 #endif
5763 }
5764 
5777 static inline void hpm_dsp_dct4_q31(q31_t *src, uint32_t m)
5778 {
5779 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5780  riscv_dsp_dct4_q31(src, m);
5781 #endif
5782 }
5783 
5796 static inline void hpm_dsp_idct4_q31(q31_t *src, uint32_t m)
5797 {
5798 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5799  riscv_dsp_idct4_q31(src, m);
5800 #endif
5801 }
5802 
5814 void hpm_software_cfft_float(float *src, uint32_t m);
5815 
5816 #endif
5817 
5818 #if defined(HPMSOC_HAS_HPMSDK_FFA) && defined(HPM_EN_MATH_DSP_LIB)
5819 
5820 #include "hpm_ffa_drv.h"
5821 #include "hpm_soc.h"
5834 static inline void hpm_ffa_cfft_q15(q15_t *src, uint32_t m)
5835 {
5836  fft_xfer_t xfer = { 0 };
5837  xfer.num_points = 1 << m;
5838  xfer.src = src;
5839  xfer.dst = src;
5840  xfer.is_ifft = false;
5844 }
5853 static inline void hpm_ffa_cfft_q31(q31_t *src, uint32_t m)
5854 {
5855  fft_xfer_t xfer = { 0 };
5856  xfer.num_points = 1 << m;
5857  xfer.src = src;
5858  xfer.dst = src;
5859  xfer.is_ifft = false;
5863 }
5864 
5865 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5866 static inline void hpm_ffa_cfft_f32(float *src, uint32_t m)
5867 {
5868  fft_xfer_t xfer = { 0 };
5869  xfer.num_points = 1 << m;
5870  xfer.src = src;
5871  xfer.dst = src;
5872  xfer.is_ifft = false;
5873  xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5874  xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5875  ffa_enable_fp_bias(HPM_FFA);
5876  ffa_set_coef_max_index(HPM_FFA, 0);
5877  ffa_set_output_max_index(HPM_FFA, 20);
5878  ffa_set_input_max_index(HPM_FFA, 20 - m);
5880 }
5881 #endif
5890 static inline void hpm_ffa_cifft_q15(q15_t *src, uint32_t m)
5891 {
5892  fft_xfer_t xfer = { 0 };
5893  xfer.num_points = 1 << m;
5894  xfer.src = src;
5895  xfer.dst = src;
5896  xfer.is_ifft = true;
5900 }
5901 
5910 static inline void hpm_ffa_cifft_q31(q31_t *src, uint32_t m)
5911 {
5912  fft_xfer_t xfer = { 0 };
5913  xfer.num_points = 1 << m;
5914  xfer.src = src;
5915  xfer.dst = src;
5916  xfer.is_ifft = true;
5920 }
5921 
5922 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5923 static inline void hpm_ffa_cifft_f32(float *src, uint32_t m)
5924 {
5925  fft_xfer_t xfer = { 0 };
5926  xfer.num_points = 1 << m;
5927  xfer.src = src;
5928  xfer.dst = src;
5929  xfer.is_ifft = true;
5930  xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5931  xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5932  ffa_enable_fp_bias(HPM_FFA);
5933  ffa_set_coef_max_index(HPM_FFA, 0x0);
5934  ffa_set_output_max_index(HPM_FFA, 10);
5935  ffa_set_input_max_index(HPM_FFA, 20);
5937 }
5938 #endif
5939 
5940 #endif
5941 
5942 #endif
5943 
5949 #ifdef HPM_MATH_DSP_UTILS
5950 
5959 #ifdef HPM_EN_MATH_DSP_LIB
5960 #ifdef __zcc__
5961 #include <tpt_math.h>
5962 #endif
5963 #include "riscv_dsp_utils_math.h"
5964 // Cosine and Sine
5965 static inline float32_t hpm_dsp_cos_f32(float32_t src)
5966 {
5967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5968  return riscv_dsp_cos_f32(src);
5969 #endif
5970 }
5971 static inline q31_t hpm_dsp_cos_q31(q31_t src)
5972 {
5973 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5974  return riscv_dsp_cos_q31(src);
5975 #endif
5976 }
5977 static inline q15_t hpm_dsp_cos_q15(q15_t src)
5978 {
5979 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5980  return riscv_dsp_cos_q15(src);
5981 #endif
5982 }
5983 
5984 static inline float32_t hpm_dsp_sin_f32(float32_t src)
5985 {
5986 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5987  return riscv_dsp_sin_f32(src);
5988 #endif
5989 }
5990 
5991 #if defined (__riscv_zfh)
5996 static inline float16_t hpm_dsp_sin_f16(float16_t src)
5997 {
5998 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5999  return riscv_dsp_sin_f16(src);
6000 #endif
6001 }
6002 #endif
6003 
6004 static inline q31_t hpm_dsp_sin_q31(q31_t src)
6005 {
6006 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6007  return riscv_dsp_sin_q31(src);
6008 #endif
6009 }
6010 static inline q15_t hpm_dsp_sin_q15(q15_t src)
6011 {
6012 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6013  return riscv_dsp_sin_q15(src);
6014 #endif
6015 }
6016 
6017 // Arc tangent
6018 static inline float32_t hpm_dsp_atan_f32(float32_t src)
6019 {
6020 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6021  return riscv_dsp_atan_f32(src);
6022 #endif
6023 }
6024 static inline q31_t hpm_dsp_atan_q31(q31_t src)
6025 {
6026 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6027  return riscv_dsp_atan_q31(src);
6028 #endif
6029 }
6030 static inline q15_t hpm_dsp_atan_q15(q15_t src)
6031 {
6032 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6033  return riscv_dsp_atan_q15(src);
6034 #endif
6035 }
6036 static inline float32_t hpm_dsp_atan2_f32(float32_t srcy, float32_t src2)
6037 {
6038 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6039  return riscv_dsp_atan2_f32(srcy, src2);
6040 #endif
6041 }
6042 static inline q15_t hpm_dsp_atan2_q15(q15_t srcy, q15_t src2)
6043 {
6044 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6045  return riscv_dsp_atan2_q15(srcy, src2);
6046 #endif
6047 }
6048 static inline q31_t hpm_dsp_atan2_q31(q31_t srcy, q31_t src2)
6049 {
6050 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6051  return riscv_dsp_atan2_q31(srcy, src2);
6052 #endif
6053 }
6054 
6055 // Square Root
6061 static inline float32_t hpm_dsp_sqrt_f32(float32_t src)
6062 {
6063 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6064  return riscv_dsp_sqrt_f32(src);
6065 #endif
6066 }
6067 
6073 static inline q31_t hpm_dsp_sqrt_q31(q31_t src)
6074 {
6075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6076  return riscv_dsp_sqrt_q31(src);
6077 #endif
6078 }
6079 
6085 static inline q15_t hpm_dsp_sqrt_q15(q15_t src)
6086 {
6087 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6088  return riscv_dsp_sqrt_q15(src);
6089 #endif
6090 }
6091 
6092 // Convert function
6099 static inline void hpm_dsp_convert_f32_q15(float32_t *src, q15_t *dst, uint32_t size)
6100 {
6101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6102  riscv_dsp_convert_f32_q15(src, dst, size);
6103 #endif
6104 }
6105 
6112 static inline void hpm_dsp_convert_f32_q31(float32_t *src, q31_t *dst, uint32_t size)
6113 {
6114 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6115 #ifdef __zcc__
6116  tpt_f32_to_q31(dst, src, size);
6117 #else
6118  riscv_dsp_convert_f32_q31(src, dst, size);
6119 #endif
6120 #endif
6121 }
6122 
6129 static inline void hpm_dsp_convert_f32_q7(float32_t *src, q7_t *dst, uint32_t size)
6130 {
6131 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6132  riscv_dsp_convert_f32_q7(src, dst, size);
6133 #endif
6134 }
6135 
6142 static inline void hpm_dsp_convert_q15_f32(q15_t *src, float32_t *dst, uint32_t size)
6143 {
6144 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6145  riscv_dsp_convert_q15_f32(src, dst, size);
6146 #endif
6147 }
6148 
6155 static inline void hpm_dsp_convert_q15_q31(q15_t *src, q31_t *dst, uint32_t size)
6156 {
6157 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6158  riscv_dsp_convert_q15_q31(src, dst, size);
6159 #endif
6160 }
6161 
6168 static inline void hpm_dsp_convert_q15_q7(q15_t *src, q7_t *dst, uint32_t size)
6169 {
6170 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6171  riscv_dsp_convert_q15_q7(src, dst, size);
6172 #endif
6173 }
6174 
6181 static inline void hpm_dsp_convert_q31_f32(q31_t *src, float32_t *dst, uint32_t size)
6182 {
6183 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6184 #ifdef __zcc__
6185  tpt_q31_to_f32(dst, src, size);
6186 #else
6187  riscv_dsp_convert_q31_f32(src, dst, size);
6188 #endif
6189 #endif
6190 }
6191 
6198 static inline void hpm_dsp_convert_q31_q15(q31_t *src, q15_t *dst, uint32_t size)
6199 {
6200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6201  riscv_dsp_convert_q31_q15(src, dst, size);
6202 #endif
6203 }
6204 
6211 static inline void hpm_dsp_convert_q31_q7(q31_t *src, q7_t *dst, uint32_t size)
6212 {
6213 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6214  riscv_dsp_convert_q31_q7(src, dst, size);
6215 #endif
6216 }
6217 
6224 static inline void hpm_dsp_convert_q7_f32(q7_t *src, float32_t *dst, uint32_t size)
6225 {
6226 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6227  riscv_dsp_convert_q7_f32(src, dst, size);
6228 #endif
6229 }
6230 
6237 static inline void hpm_dsp_convert_q7_q15(q7_t *src, q15_t *dst, uint32_t size)
6238 {
6239 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6240  riscv_dsp_convert_q7_q15(src, dst, size);
6241 #endif
6242 }
6243 
6250 static inline void hpm_dsp_convert_q7_q31(q7_t *src, q31_t *dst, uint32_t size)
6251 {
6252 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6253  riscv_dsp_convert_q7_q31(src, dst, size);
6254 #endif
6255 }
6256 
6257 // Duplicate function
6264 static inline void hpm_dsp_dup_f32(float32_t *src, float32_t *dst, uint32_t size)
6265 {
6266 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6267  riscv_dsp_dup_f32(src, dst, size);
6268 #endif
6269 }
6270 
6277 static inline void hpm_dsp_dup_q15(q15_t *src, q15_t *dst, uint32_t size)
6278 {
6279 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6280  riscv_dsp_dup_q15(src, dst, size);
6281 #endif
6282 }
6283 
6290 static inline void hpm_dsp_dup_q31(q31_t *src, q31_t *dst, uint32_t size)
6291 {
6292 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6293  riscv_dsp_dup_q31(src, dst, size);
6294 #endif
6295 }
6296 
6303 static inline void hpm_dsp_dup_q7(q7_t *src, q7_t *dst, uint32_t size)
6304 {
6305 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6306  riscv_dsp_dup_q7(src, dst, size);
6307 #endif
6308 }
6309 
6310 // Set function
6317 static inline void hpm_dsp_set_f32(float32_t val, float32_t *dst, uint32_t size)
6318 {
6319 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6320  riscv_dsp_set_f32(val, dst, size);
6321 #endif
6322 }
6323 
6330 static inline void hpm_dsp_set_q15(q15_t val, q15_t *dst, uint32_t size)
6331 {
6332 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6333  riscv_dsp_set_q15(val, dst, size);
6334 #endif
6335 }
6336 
6343 static inline void hpm_dsp_set_q31(q31_t val, q31_t *dst, uint32_t size)
6344 {
6345 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6346  riscv_dsp_set_q31(val, dst, size);
6347 #endif
6348 }
6349 
6356 static inline void hpm_dsp_set_q7(q7_t val, q7_t *dst, uint32_t size)
6357 {
6358 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6359  riscv_dsp_set_q7(val, dst, size);
6360 #endif
6361 }
6362 
6371 static inline float32_t hpm_dsp_weighted_sum_f32(const float32_t *src, const float32_t *weight, uint32_t size)
6372 {
6373 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6374  return riscv_dsp_weighted_sum_f32(src, weight, size);
6375 #endif
6376 }
6377 
6387 static inline void hpm_dsp_barycenter_f32(const float32_t *src, const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
6388 {
6389 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6390  riscv_dsp_barycenter_f32(src, weights, out, numofvec, dimofvec);
6391 #endif
6392 }
6393 
6399 static inline float32_t hpm_dsp_exp_f32(float32_t src)
6400 {
6401 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6402  return riscv_dsp_exp_f32(src);
6403 #endif
6404 }
6405 
6406 #if defined (__riscv_zfh)
6412 static inline float16_t hpm_dsp_exp_f16(float16_t src)
6413 {
6414 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6415  return riscv_dsp_exp_f16(src);
6416 #endif
6417 }
6418 #endif
6419 
6425 static inline float32_t hpm_dsp_sigmoid_f32(float32_t src)
6426 {
6427 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6428  return riscv_dsp_sigmoid_f32(src);
6429 #endif
6430 }
6431 
6432 #if defined (__riscv_zfh)
6438 static inline float16_t hpm_dsp_sigmoid_f16(float16_t src)
6439 {
6440 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6441  return riscv_dsp_sigmoid_f16(src);
6442 #endif
6443 }
6444 #endif
6445 
6451 static inline float32_t hpm_dsp_log_f32(float32_t src)
6452 {
6453 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6454  return riscv_dsp_log_f32(src);
6455 #endif
6456 }
6457 
6458 #if defined (__riscv_zfh)
6464 static inline float16_t hpm_dsp_log_f16(float16_t src)
6465 {
6466 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6467  return riscv_dsp_log_f16(src);
6468 #endif
6469 }
6470 #endif
6471 
6478 #endif
6479 #endif
6480 
6481 #ifdef HPM_MATH_DSP_SORT
6482 
6493 #ifdef HPM_EN_MATH_DSP_LIB
6494 #include "riscv_dsp_sort_math.h"
6517 static inline void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t * instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
6518 {
6519 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6520  riscv_dsp_sort_init_f32(instance, alg, order);
6521 #endif
6522 }
6523 
6564 static inline void hpm_dsp_sort_f32(const riscv_dsp_sort_f32_t * instance,float32_t * src, float32_t * dst, uint32_t size)
6565 {
6566 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6567  riscv_dsp_sort_f32(instance, src, dst, size);
6568 #endif
6569 }
6570 
6586 static inline void hpm_dsp_sort_merge_init_f32(riscv_dsp_sort_merge_f32_t * instance, riscv_dsp_sort_order order, float32_t * buf)
6587 {
6588 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6589  riscv_dsp_sort_merge_init_f32(instance, order, buf);
6590 #endif
6591 }
6592 
6626 static inline void hpm_dsp_sort_merge_f32(const riscv_dsp_sort_merge_f32_t * instance, float32_t * src, float32_t * dst, uint32_t size)
6627 {
6628 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6629  riscv_dsp_sort_merge_f32(instance, src, dst, size);
6630 #endif
6631 }
6632 
6633 #endif
6634 #endif
6635 
6636 #ifdef HPM_MATH_NN_TINYENGINE
6637 #ifdef HPM_EN_MATH_DSP_LIB
6638 
6639 #include "riscv_math_types.h"
6640 #include <string.h>
6641 #include "riscv_simd_convert.h"
6642 
6643 #define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
6644 #define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
6645 #define Q31_MAX ((q31_t)(0x7FFFFFFFL))
6646 #define Q31_MIN ((q31_t)(0x80000000L))
6647 
6648 static inline void write_q15x2_ia(
6649  q15_t **pQ15,
6650  q31_t value)
6651 {
6652  q31_t val = value;
6653  (*pQ15)[0] = (val & 0x0FFFF);
6654  (*pQ15)[1] = (val >> 16) & 0x0FFFF;
6655  *pQ15 += 2;
6656 }
6657 
6664 __STATIC_FORCEINLINE q31_t hpm_nn_read_q15x2_ia(const q15_t **in_q15)
6665 {
6666  q31_t val;
6667 
6668  val = *(q31_t *)(*in_q15);
6669  *in_q15 += 2;
6670 
6671  return val;
6672 }
6673 
6682 __STATIC_FORCEINLINE q31_t hpm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
6683 {
6684  q31_t result = 0;
6685  q63_t mult = 1 << 30;
6686 
6687  if ((m1 < 0) ^ (m2 < 0)) {
6688  mult = 1 - mult;
6689  }
6690  mult = mult + (q63_t)m1 * m2;
6691  result = mult / (1UL << 31);
6692 
6693  if ((m1 == m2) && (m1 == (int32_t)Q31_MIN)) {
6694  result = Q31_MAX;
6695  }
6696  return result;
6697 }
6698 
6707 __STATIC_FORCEINLINE q31_t hpm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
6708 {
6709  q31_t result = 0;
6710 
6711  const q31_t remainder_mask = (1l << exponent) - 1;
6712  int32_t remainder = remainder_mask & dividend;
6713 
6714  result = dividend >> exponent;
6715  q31_t threshold = remainder_mask >> 1;
6716  if (result < 0) {
6717  threshold++;
6718  }
6719  if (remainder > threshold) {
6720  result++;
6721  }
6722 
6723  return result;
6724 }
6725 
6726 __STATIC_FORCEINLINE q31_t hpm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
6727 {
6728  return hpm_nn_divide_by_power_of_two(hpm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
6729  RIGHT_SHIFT(shift));
6730 }
6731 
6737 __STATIC_FORCEINLINE q31_t hpm_nn_read_q7x4_ia(const q7_t **in_q7)
6738 {
6739  q31_t val;
6740 
6741  val = *(q31_t *)(*in_q7);
6742  *in_q7 += 4;
6743 
6744  return val;
6745 }
6746 
6751 __STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
6752 {
6753  q31_t inA = hpm_nn_read_q7x4_ia(&source);
6754 
6755  *out2 = __SXTB16_ROR(inA, 8);
6756  *out1 = __SXTB16(inA);
6757 
6758  return source;
6759 }
6760 
6765 __STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
6766 {
6767  q31_t inA = hpm_nn_read_q7x4_ia(&source);
6768  q31_t inAbuf1 = __SXTB16_ROR(inA, 8);
6769  q31_t inAbuf2 = __SXTB16(inA);
6770 
6771  *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
6772  *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
6773 
6774  return source;
6775 }
6776 
6782 __STATIC_FORCEINLINE int32_t hpm_nn_read_s8x4_ia(const int8_t **in_s8)
6783 {
6784  int32_t val;
6785 
6786  val = *(int32_t *)(*in_s8);
6787  *in_s8 += 4;
6788 
6789  return val;
6790 }
6791 
6792 __STATIC_FORCEINLINE void hpm_nn_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
6793 {
6794  int32_t block_cnt;
6795 
6796  /* Run the below code for cores that support SIMD instructions */
6797  int32_t in_q7x4;
6798  int32_t in_q15x2_1;
6799  int32_t in_q15x2_2;
6800  int32_t out_q15x2_1;
6801  int32_t out_q15x2_2;
6802 
6803  /*loop unrolling */
6804  block_cnt = block_size >> 2;
6805 
6806  /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */
6807  const int32_t offset_q15x2 = __PKHBT(offset, offset, 16);
6808  while (block_cnt > 0) {
6809  /* convert from s8 to s16 and then store the results in the destination buffer */
6810  in_q7x4 = hpm_nn_read_s8x4_ia(&src);
6811 
6812  /* Extract and sign extend each of the four s8 values to s16 */
6813  in_q15x2_1 = __SXTAB16(offset_q15x2, __ROR(in_q7x4, 8));
6814  in_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4);
6815 
6816  out_q15x2_2 = __PKHTB(in_q15x2_1, in_q15x2_2, 16);
6817  out_q15x2_1 = __PKHBT(in_q15x2_2, in_q15x2_1, 16);
6818 
6819  write_q15x2_ia(&dst, out_q15x2_1);
6820  write_q15x2_ia(&dst, out_q15x2_2);
6821 
6822  block_cnt--;
6823  }
6824  /* Handle left over samples */
6825  block_cnt = block_size % 0x4;
6826 
6827  while (block_cnt > 0) {
6828  *dst++ = (int16_t)*src++ + offset;
6829 
6830  /* Decrement the loop counter */
6831  block_cnt--;
6832  }
6833 }
6834 
6835 #endif
6836 #endif
6837 
6838 #ifdef HPM_MATH_NN_ACTIVATION
6839 #ifdef HPM_EN_MATH_NN_LIB
6840 #if defined(__zcc__)
6841 #include "tpt_nn_activation.h"
6842 #else
6843 #include "riscv_nn_activation.h"
6844 #endif
6876 static inline void hpm_nn_activate_s8(q7_t *in_out,
6877  uint32_t size,
6878  uint16_t int_bits,
6879  riscv_nn_activation_fun act_fun)
6880 {
6881 #if defined(__zcc__)
6882  tpt_nn_activate_s8(in_out, size, int_bits, act_fun);
6883 #else
6884  riscv_nn_activate_s8(in_out, size, int_bits, act_fun);
6885 #endif
6886 }
6887 
6903 static inline void hpm_nn_activate_s16(q15_t *in_out,
6904  uint32_t size,
6905  uint16_t int_bits,
6906  riscv_nn_activation_fun act_fun)
6907 {
6908 #if defined(__zcc__)
6909  tpt_nn_activate_s16(in_out, size, int_bits, act_fun);
6910 #else
6911  riscv_nn_activate_s16(in_out, size, int_bits, act_fun);
6912 #endif
6913 }
6914 
6932 static inline void hpm_nn_leaky_relu_s8(q7_t *in_out,
6933  uint32_t size,
6934  q15_t slope)
6935 #if defined(__zcc__)
6936  tpt_nn_leaky_relu_q7(in_out, in_out, size, slope);
6937 #else
6938  riscv_nn_leaky_relu_s8(in_out, size, slope);
6939 #endif
6940 }
6941 
6949 static inline void hpm_nn_relu_any_s8(q7_t *data, uint16_t size, q7_t max_val)
6950 {
6951 #if defined(__zcc__)
6952  tpt_nn_relu_any_q7(data, size, max_val);
6953 #else
6954  riscv_nn_relu_any_s8(data, size, max_val);
6955 #endif
6956 }
6957 
6974 static inline void hpm_nn_relu_s8(q7_t *in_out, uint32_t size)
6975 {
6976 #if defined(__zcc__)
6977  tpt_nn_relu_q7(in_out, size);
6978 #else
6979  riscv_nn_relu_s8(in_out, size);
6980 #endif
6981 }
6982 
6989 static inline void hpm_nn_relu_s16(q15_t *in_out, uint32_t size)
6990 {
6991 #if defined(__zcc__)
6992  tpt_nn_relu_q15(in_out, size);
6993 #else
6994  riscv_nn_relu_s16(in_out, size);
6995 #endif
6996 }
6997 
6998 #ifdef __riscv_zfh
7008 static inline int32_t hpm_nn_sigmoid_f16(const float16_t *in_vec,
7009  uint32_t size,
7010  float16_t *out_vec)
7011 {
7012 #if defined(__zcc__)
7013  return tpt_nn_sigmoid_f16(in_vec, size, out_vec);
7014 #else
7015  return riscv_nn_sigmoid_f16(in_vec, size, out_vec);
7016 #endif
7017 }
7018 
7027 static inline int32_t hpm_nn_tanh_f16(const float16_t *in_vec,
7028  uint32_t size,
7029  float16_t *out_vec)
7030 {
7031 #if defined(__zcc__)
7032  return tpt_nn_tanh_f16(in_vec, size, out_vec);
7033 #else
7034  return riscv_nn_tanh_f16(in_vec, size, out_vec);
7035 #endif
7036 }
7037 #endif
7038 
7042 #endif
7043 #endif
7044 
7045 #ifdef HPM_MATH_NN_BASIC
7046 #ifdef HPM_EN_MATH_NN_LIB
7047 #if defined(__zcc__)
7048 #include "tpt_nn_basic.h"
7049 #else
7050 #include "riscv_nn_basic.h"
7051 #endif
7097 static inline void hpm_nn_add_s8_sym(const q7_t *in_tensor1,
7098  const q7_t *in_tensor2,
7099  const int16_t *scale1,
7100  const int16_t *scale2,
7101  const uint32_t size,
7102  const uint16_t pre_rshift,
7103  const uint16_t out_scale,
7104  const uint16_t post_rshift,
7105  q7_t *out)
7106 {
7107 #if defined(__zcc__)
7108  tpt_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2, size, pre_rshift,
7109  out_scale, post_rshift, out);
7110 #else
7111  riscv_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2, size, pre_rshift,
7112  out_scale, post_rshift, out);
7113 #endif
7114 }
7115 
7135 static inline void hpm_nn_add_s8_sym_round(const q7_t *in_tensor1,
7136  const q7_t *in_tensor2,
7137  const uint32_t scale1,
7138  const uint32_t scale2,
7139  const uint32_t size,
7140  const uint16_t pre_rshift,
7141  const uint16_t out_scale,
7142  const uint16_t post_rshift,
7143  q7_t *out)
7144 {
7145 #if defined(__zcc__)
7146  tpt_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2, size,
7147  pre_rshift, out_scale, post_rshift, out);
7148 #else
7149  riscv_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2, size,
7150  pre_rshift, out_scale, post_rshift, out);
7151 #endif
7152 }
7153 
7205 static inline int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1,
7206  const int8_t *in_tensor2,
7207  const int32_t in_offset1,
7208  const int32_t in_scale1,
7209  const int32_t in_rshift1,
7210  const int32_t in_offset2,
7211  const int32_t in_scale2,
7212  const int32_t in_rshift2,
7213  const int32_t lshift,
7214  int8_t *out,
7215  const int32_t out_offset,
7216  const int32_t out_scale,
7217  const int32_t out_rshift,
7218  const int32_t act_min,
7219  const int32_t act_max,
7220  const uint32_t size)
7221 {
7222 #if defined(__zcc__)
7223  return tpt_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7224  in_rshift1, in_offset2, in_scale2, in_rshift2,
7225  lshift, out, out_offset, out_scale, out_rshift,
7226  act_min, act_max, size);
7227 #else
7228  return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7229  in_rshift1, in_offset2, in_scale2, in_rshift2,
7230  lshift, out, out_offset, out_scale, out_rshift,
7231  act_min, act_max, size);
7232 #endif
7233 }
7234 
7274 static inline int hpm_nn_ew_mul_s8_asym(const int8_t *in_tensor1,
7275  const int8_t *in_tensor2,
7276  const int32_t in_offset1,
7277  const int32_t in_offset2,
7278  int8_t *out,
7279  const int32_t out_offset,
7280  const int32_t out_scale,
7281  const int32_t out_shift,
7282  const int32_t act_min,
7283  const int32_t act_max,
7284  const uint32_t size)
7285 {
7286 #if defined(__zcc__)
7287  return tpt_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7288  out, out_offset, out_scale, out_shift, act_min,
7289  act_max, size);
7290 #else
7291  return riscv_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7292  out, out_offset, out_scale, out_shift, act_min,
7293  act_max, size);
7294 #endif
7295 }
7296 
7301 #endif
7302 
7303 #ifdef HPM_EN_MATH_NN_RVP32_LIB
7304 #if defined(__zcc__)
7305 #include "tpt_nn_basic.h"
7306 #else
7307 #include "riscv_nn_basic.h"
7308 #endif
7309 
7362 static inline int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1,
7363  const int8_t *in_tensor2,
7364  const int32_t in_offset1,
7365  const int32_t in_scale1,
7366  const int32_t in_rshift1,
7367  const int32_t in_offset2,
7368  const int32_t in_scale2,
7369  const int32_t in_rshift2,
7370  const int32_t lshift,
7371  int8_t *out,
7372  const int32_t out_offset,
7373  const int32_t out_scale,
7374  const int32_t out_rshift,
7375  const int32_t act_min,
7376  const int32_t act_max,
7377  const uint32_t size)
7378 {
7379 #if defined(__zcc__)
7380  return tpt_elementwise_add_s8(out, out_offset, out_scale, -out_rshift, act_min,
7381  act_max, in_tensor1, in_tensor2, in_offset1, in_scale1,
7382  in_rshift1, in_offset2, in_scale2, in_rshift2,
7383  lshift, size);
7384 #else
7385  return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7386  in_rshift1, in_offset2, in_scale2, in_rshift2,
7387  lshift, out, out_offset, out_scale, out_rshift,
7388  act_min, act_max, size);
7389 #endif
7390 }
7391 
7392 #endif
7393 
7394 #endif
7395 
7396 #ifdef HPM_MATH_NN_CONCATENATION
7397 #ifdef HPM_EN_MATH_NN_LIB
7398 #if defined(__zcc__)
7399 #include "tpt_nn_concatenation.h"
7400 #else
7401 #include "riscv_nn_concatenation.h"
7402 #endif
7403 
7429 static inline void hpm_nn_concate_s8_w(const int8_t *in_tensor,
7430  const uint16_t in_tensor_x,
7431  const uint16_t in_tensor_y,
7432  const uint16_t in_tensor_z,
7433  const uint16_t in_tensor_w,
7434  int8_t *out_tensor,
7435  const uint32_t out_offset_w)
7436 {
7437 #if defined(__zcc__)
7438  tpt_concatenation_s8_w(out_tensor, in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7439  in_tensor_w, out_offset_w);
7440 #else
7441  riscv_nn_concate_s8_w(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7442  in_tensor_w, out_tensor, out_offset_w);
7443 #endif
7444 }
7445 
7464 static inline void hpm_nn_concate_s8_x(const int8_t *in_tensor,
7465  const uint16_t in_tensor_x,
7466  const uint16_t in_tensor_y,
7467  const uint16_t in_tensor_z,
7468  const uint16_t in_tensor_w,
7469  int8_t *out_tensor,
7470  const uint16_t out_tensor_x,
7471  const uint32_t out_offset_x)
7472 {
7473 #if defined(__zcc__)
7474  tpt_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7475  in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7476 #else
7477  riscv_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7478  in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7479 #endif
7480 }
7481 
7499 static inline void hpm_nn_concate_s8_y(const int8_t *in_tensor,
7500  const uint16_t in_tensor_x,
7501  const uint16_t in_tensor_y,
7502  const uint16_t in_tensor_z,
7503  const uint16_t in_tensor_w,
7504  int8_t *out_tensor,
7505  const uint16_t out_tensor_y,
7506  const uint32_t out_offset_y)
7507 {
7508 #if defined(__zcc__)
7509  tpt_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7510  in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7511 #else
7512  riscv_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7513  in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7514 #endif
7515 }
7516 
7534 static inline void hpm_nn_concate_s8_z(const int8_t *in_tensor,
7535  const uint16_t in_tensor_x,
7536  const uint16_t in_tensor_y,
7537  const uint16_t in_tensor_z,
7538  const uint16_t in_tensor_w,
7539  int8_t *out_tensor,
7540  const uint16_t out_tensor_z,
7541  const uint32_t out_offset_z)
7542 {
7543 #if defined(__zcc__)
7544  tpt_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7545  in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7546 #else
7547  riscv_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7548  in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7549 #endif
7550 }
7551 
7556 #endif
7557 #endif
7558 
7559 #ifdef HPM_MATH_NN_CONVOLUTION
7560 #ifdef HPM_EN_MATH_NN_LIB
7561 #if defined(__zcc__)
7562 #include "tpt_nn_convolution.h"
7563 #else
7564 #include "riscv_nn_convolution.h"
7565 #endif
7566 
7654 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor,
7655  const uint16_t in_tensor_dim_x,
7656  const uint16_t in_tensor_dim_y,
7657  const uint16_t in_tensor_ch,
7658  const q7_t *ker_weight,
7659  const uint16_t out_tensor_ch,
7660  const uint16_t ker_dim_x,
7661  const uint16_t ker_dim_y,
7662  const uint16_t pad_x,
7663  const uint16_t pad_y,
7664  const uint16_t stride_x,
7665  const uint16_t stride_y,
7666  const q7_t *bias,
7667  const uint16_t bias_lshift,
7668  const uint16_t out_rshift,
7669  q7_t *out_tensor,
7670  const uint16_t out_tensor_dim_x,
7671  const uint16_t out_tensor_dim_y,
7672  q15_t *in_tmp_buf,
7673  q7_t *tmp_buf)
7674 {
7675 #if defined(__zcc__)
7676  return tpt_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7677  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7678  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7679  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
7680  out_tensor_dim_y, in_tmp_buf, tmp_buf);
7681 #else
7682  return riscv_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7683  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7684  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7685  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
7686  out_tensor_dim_y, in_tmp_buf, tmp_buf);
7687 #endif
7688 }
7689 
7741 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(const q7_t *in_tensor,
7742  const uint16_t in_tensor_dim,
7743  const q7_t *ker_weight,
7744  const uint16_t out_tensor_ch,
7745  const uint16_t ker_dim,
7746  const uint16_t pad,
7747  const uint16_t stride,
7748  const q7_t *bias,
7749  const uint16_t bias_lshift,
7750  const uint16_t out_rshift,
7751  q7_t *out_tensor,
7752  const uint16_t out_tensor_dim,
7753  q15_t *in_tmp_buf,
7754  q7_t *tmp_buf)
7755 {
7756 #if defined(__zcc__)
7757  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7758  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7759  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7760  tmp_buf);
7761 #else
7762  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7763  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7764  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7765  tmp_buf);
7766 #endif
7767 }
7768 
7820 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(const q7_t *in_tensor,
7821  const uint16_t in_tensor_dim,
7822  const q7_t *ker_weight,
7823  const uint16_t out_tensor_ch,
7824  const uint16_t ker_dim,
7825  const uint16_t pad,
7826  const uint16_t stride,
7827  const q7_t *bias,
7828  const uint16_t bias_lshift,
7829  const uint16_t out_rshift,
7830  q7_t *out_tensor,
7831  const uint16_t out_tensor_dim,
7832  q15_t *in_tmp_buf,
7833  q15_t *wt_tmp_buf)
7834 {
7835 #if defined(__zcc__)
7836  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7837  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7838  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7839  wt_tmp_buf);
7840 #else
7841  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7842  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7843  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7844  wt_tmp_buf);
7845 #endif
7846 }
7847 
7848 
7899 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor,
7900  const uint16_t in_tensor_dim,
7901  const uint16_t in_tensor_ch,
7902  const q7_t *ker_weight,
7903  const uint16_t out_tensor_ch,
7904  const uint16_t ker_dim,
7905  const uint16_t pad,
7906  const uint16_t stride,
7907  const q7_t *bias,
7908  const uint16_t bias_lshift,
7909  const uint16_t out_rshift,
7910  q7_t *out_tensor,
7911  const uint16_t out_tensor_dim,
7912  q15_t *in_tmp_buf,
7913  q7_t *tmp_buf)
7914 {
7915 #if defined(__zcc__)
7916  return tpt_nn_conv_HWC_s8_s8_s8_sft_bias(
7917  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7918  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
7919  out_tensor_dim, in_tmp_buf, tmp_buf);
7920 #else
7921  return riscv_nn_conv_HWC_s8_s8_s8_sft_bias(
7922  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7923  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
7924  out_tensor_dim, in_tmp_buf, tmp_buf);
7925 #endif
7926 }
7927 
7989 static inline void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor,
7990  const uint16_t in_tensor_dim_x,
7991  const uint16_t in_tensor_dim_y,
7992  const uint16_t in_tensor_ch,
7993  const q7_t *ker_weight,
7994  const uint16_t out_tensor_ch,
7995  const uint16_t ker_dim_x,
7996  const uint16_t ker_dim_y,
7997  const uint16_t pad_x,
7998  const uint16_t pad_y,
7999  const uint16_t stride_x,
8000  const uint16_t stride_y,
8001  const q7_t *bias,
8002  const uint16_t bias_lshift,
8003  const uint16_t out_rshift,
8004  q7_t *out_tensor,
8005  const uint16_t out_tensor_dim_x,
8006  const uint16_t out_tensor_dim_y,
8007  q15_t *in_tmp_buf,
8008  q7_t *tmp_buf)
8009 {
8010 #if defined(__zcc__)
8011  tpt_nn_conv_HWC_s8_s8_s8_sft_bias_any(
8012  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8013  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8014  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8015  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8016 #else
8017  riscv_nn_conv_HWC_s8_s8_s8_sft_bias_any(
8018  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8019  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8020  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8021  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8022 #endif
8023 }
8024 
8077 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast(const q7_t *in_tensor,
8078  const uint16_t in_tensor_dim,
8079  const uint16_t in_tensor_ch,
8080  const q7_t *ker_weight,
8081  const uint16_t out_tensor_ch,
8082  const uint16_t ker_dim,
8083  const uint16_t pad,
8084  const uint16_t stride,
8085  const q7_t *bias,
8086  const uint16_t bias_lshift,
8087  const uint16_t out_rshift,
8088  q7_t *out_tensor,
8089  const uint16_t out_tensor_dim,
8090  q15_t *in_tmp_buf,
8091  q7_t *tmp_buf)
8092 {
8093 #if defined(__zcc__)
8094  return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8095  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8096  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8097  out_tensor_dim, in_tmp_buf, tmp_buf);
8098 #else
8099  return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8100  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8101  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8102  out_tensor_dim, in_tmp_buf, tmp_buf);
8103 #endif
8104 }
8105 
8172 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor,
8173  const uint16_t in_tensor_dim_x,
8174  const uint16_t in_tensor_dim_y,
8175  const uint16_t in_tensor_ch,
8176  const q7_t *ker_weight,
8177  const uint16_t out_tensor_ch,
8178  const uint16_t ker_dim_x,
8179  const uint16_t ker_dim_y,
8180  const uint16_t pad_x,
8181  const uint16_t pad_y,
8182  const uint16_t stride_x,
8183  const uint16_t stride_y,
8184  const q7_t *bias,
8185  const uint16_t bias_lshift,
8186  const uint16_t out_rshift,
8187  q7_t *out_tensor,
8188  const uint16_t out_tensor_dim_x,
8189  const uint16_t out_tensor_dim_y,
8190  q15_t *in_tmp_buf,
8191  q7_t *tmp_buf)
8192 {
8193 #if defined(__zcc__)
8194  return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8195  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8196  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8197  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8198  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8199 #else
8200  return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8201  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8202  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8203  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8204  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8205 #endif
8206 }
8207 
8208 
8259 static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias(const q15_t *in_tensor,
8260  const uint16_t in_tensor_dim,
8261  const uint16_t in_tensor_ch,
8262  const q15_t *ker_weight,
8263  const uint16_t out_tensor_ch,
8264  const uint16_t ker_dim,
8265  const uint16_t pad,
8266  const uint16_t stride,
8267  const q15_t *bias,
8268  const uint16_t bias_lshift,
8269  const uint16_t out_rshift,
8270  q15_t *out_tensor,
8271  const uint16_t out_tensor_dim,
8272  q15_t *in_tmp_buf,
8273  q7_t *tmp_buf)
8274 {
8275 #if defined(__zcc__)
8276  return tpt_nn_conv_HWC_s16_s16_s16_sft_bias(
8277  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8278  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8279  out_tensor_dim, in_tmp_buf, tmp_buf);
8280 #else
8281  return riscv_nn_conv_HWC_s16_s16_s16_sft_bias(
8282  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8283  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8284  out_tensor_dim, in_tmp_buf, tmp_buf);
8285 #endif
8286 }
8287 
8340 static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast(const q15_t *in_tensor,
8341  const uint16_t in_tensor_dim,
8342  const uint16_t in_tensor_ch,
8343  const q15_t *ker_weight,
8344  const uint16_t out_tensor_ch,
8345  const uint16_t ker_dim,
8346  const uint16_t pad,
8347  const uint16_t stride,
8348  const q15_t *bias,
8349  const uint16_t bias_lshift,
8350  const uint16_t out_rshift,
8351  q15_t *out_tensor,
8352  const uint16_t out_tensor_dim,
8353  q15_t *in_tmp_buf,
8354  q7_t *tmp_buf)
8355 {
8356 #if defined(__zcc__)
8357  return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8358  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8359  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8360  out_tensor_dim, in_tmp_buf, tmp_buf);
8361 #else
8362  return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8363  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8364  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8365  out_tensor_dim, in_tmp_buf, tmp_buf);
8366 #endif
8367 }
8368 
8435 static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(const q15_t *in_tensor,
8436  const uint16_t in_tensor_dim_x,
8437  const uint16_t in_tensor_dim_y,
8438  const uint16_t in_tensor_ch,
8439  const q15_t *ker_weight,
8440  const uint16_t out_tensor_ch,
8441  const uint16_t ker_dim_x,
8442  const uint16_t ker_dim_y,
8443  const uint16_t pad_x,
8444  const uint16_t pad_y,
8445  const uint16_t stride_x,
8446  const uint16_t stride_y,
8447  const q15_t *bias,
8448  const uint16_t bias_lshift,
8449  const uint16_t out_rshift,
8450  q15_t *out_tensor,
8451  const uint16_t out_tensor_dim_x,
8452  const uint16_t out_tensor_dim_y,
8453  q15_t *in_tmp_buf,
8454  q7_t *tmp_buf)
8455 {
8456 #if defined(__zcc__)
8457  return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8458  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8459  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8460  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8461  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8462 #else
8463  return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8464  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8465  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8466  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8467  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8468 #endif
8469 }
8470 
8522 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor,
8523  const uint16_t in_tensor_dim,
8524  const uint16_t in_tensor_ch,
8525  const q7_t *ker_weight,
8526  const uint16_t out_tensor_ch,
8527  const uint16_t ker_dim,
8528  const uint16_t pad,
8529  const uint16_t stride,
8530  const q7_t *bias,
8531  const uint16_t bias_lshift,
8532  const uint16_t out_rshift,
8533  q7_t *out_tensor,
8534  const uint16_t out_tensor_dim,
8535  q15_t *in_tmp_buf,
8536  q7_t *tmp_buf)
8537 {
8538 #if defined(__zcc__)
8539  return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8540  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8541  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8542  out_tensor_dim, in_tmp_buf, tmp_buf);
8543 #else
8544  return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8545  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8546  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8547  out_tensor_dim, in_tmp_buf, tmp_buf);
8548 #endif
8549 }
8550 
8613 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor,
8614  const uint16_t in_tensor_dim_x,
8615  const uint16_t in_tensor_dim_y,
8616  const uint16_t in_tensor_ch,
8617  const q7_t *ker_weight,
8618  const uint16_t out_tensor_ch,
8619  const uint16_t ker_dim_x,
8620  const uint16_t ker_dim_y,
8621  const uint16_t pad_x,
8622  const uint16_t pad_y,
8623  const uint16_t stride_x,
8624  const uint16_t stride_y,
8625  const q7_t *bias,
8626  const uint16_t bias_lshift,
8627  const uint16_t out_rshift,
8628  q7_t *out_tensor,
8629  const uint16_t out_tensor_dim_x,
8630  const uint16_t out_tensor_dim_y,
8631  q15_t *in_tmp_buf,
8632  q7_t *tmp_buf)
8633 {
8634 #if defined(__zcc__)
8635  return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8636  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8637  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8638  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8639  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8640 #else
8641  return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8642  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8643  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8644  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8645  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8646 #endif
8647 }
8648 
8694 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor,
8695  const uint16_t in_tensor_dim_x,
8696  const uint16_t in_tensor_dim_y,
8697  const uint16_t in_tensor_ch,
8698  const q7_t *ker_weight,
8699  const uint16_t out_tensor_ch,
8700  const uint16_t ker_dim_x,
8701  const uint16_t ker_dim_y,
8702  const uint16_t pad_x,
8703  const uint16_t pad_y,
8704  const uint16_t stride_x,
8705  const uint16_t stride_y,
8706  const q31_t *bias,
8707  const uint16_t pre_rshift,
8708  const uint16_t out_scale,
8709  const uint16_t post_rshift,
8710  q7_t *out_tensor,
8711  const uint16_t out_tensor_dim_x,
8712  const uint16_t out_tensor_dim_y,
8713  q15_t *in_tmp_buf)
8714 {
8715 #if defined(__zcc__)
8716 
8717 tpt_nn_conv_1x1_sym_params S1 = {stride_x, stride_y, pad_x, pad_y, pre_rshift, out_scale, post_rshift};
8718 tpt_nn_1x1_sym_dims S2 = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x, ker_dim_y,
8719  out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
8720  return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8721  out_tensor_ch, in_tensor, ker_weight, bias, &S1, &S2, in_tmp_buf);
8722 
8723 #else
8724  return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8725  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8726  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8727  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8728  out_tensor_dim_y, in_tmp_buf);
8729 #endif
8730 }
8731 
8778 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor,
8779  const uint16_t in_tensor_dim_x,
8780  const uint16_t in_tensor_dim_y,
8781  const uint16_t in_tensor_ch,
8782  const q7_t *ker_weight,
8783  const uint16_t out_tensor_ch,
8784  const uint16_t ker_dim_x,
8785  const uint16_t ker_dim_y,
8786  const uint16_t pad_x,
8787  const uint16_t pad_y,
8788  const uint16_t stride_x,
8789  const uint16_t stride_y,
8790  const q31_t *bias,
8791  const uint16_t pre_rshift,
8792  const uint16_t out_scale,
8793  const uint16_t post_rshift,
8794  q15_t *out_tensor,
8795  const uint16_t out_tensor_dim_x,
8796  const uint16_t out_tensor_dim_y,
8797  q15_t *in_tmp_buf)
8798 {
8799 #if defined(__zcc__)
8800  return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8801  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8802  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8803  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8804  out_tensor_dim_y, in_tmp_buf);
8805 #else
8806  return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8807  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8808  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8809  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8810  out_tensor_dim_y, in_tmp_buf);
8811 #endif
8812 }
8813 
8859 static inline int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor,
8860  const uint16_t in_tensor_dim_x,
8861  const uint16_t in_tensor_dim_y,
8862  const uint16_t in_tensor_ch,
8863  const q7_t *ker_weight,
8864  const uint16_t out_tensor_ch,
8865  const uint16_t ker_dim_x,
8866  const uint16_t ker_dim_y,
8867  const uint16_t pad_x,
8868  const uint16_t pad_y,
8869  const uint16_t stride_x,
8870  const uint16_t stride_y,
8871  const q31_t *bias,
8872  const uint16_t pre_rshift,
8873  const uint16_t out_scale,
8874  const uint16_t post_rshift,
8875  u8_t *out_tensor,
8876  const uint16_t out_tensor_dim_x,
8877  const uint16_t out_tensor_dim_y,
8878  q15_t *in_tmp_buf)
8879 {
8880 #if defined(__zcc__)
8881  return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8882  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8883  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8884  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8885  out_tensor_dim_y, in_tmp_buf);
8886 #else
8887  return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8888  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8889  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8890  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8891  out_tensor_dim_y, in_tmp_buf);
8892 #endif
8893 }
8894 
8941 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor,
8942  const uint16_t in_tensor_dim_x,
8943  const uint16_t in_tensor_dim_y,
8944  const uint16_t in_tensor_ch,
8945  const q7_t *ker_weight,
8946  const uint16_t out_tensor_ch,
8947  const uint16_t ker_dim_x,
8948  const uint16_t ker_dim_y,
8949  const uint16_t pad_x,
8950  const uint16_t pad_y,
8951  const uint16_t stride_x,
8952  const uint16_t stride_y,
8953  const q31_t *bias,
8954  const uint16_t pre_rshift,
8955  const uint16_t out_scale,
8956  const uint16_t post_rshift,
8957  q7_t *out_tensor,
8958  const uint16_t out_tensor_dim_x,
8959  const uint16_t out_tensor_dim_y,
8960  q15_t *in_tmp_buf)
8961 {
8962 #if defined(__zcc__)
8963  return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8964  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8965  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8966  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8967  out_tensor_dim_y, in_tmp_buf);
8968 #else
8969  return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8970  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8971  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8972  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8973  out_tensor_dim_y, in_tmp_buf);
8974 #endif
8975 }
8976 
9023 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor,
9024  const uint16_t in_tensor_dim_x,
9025  const uint16_t in_tensor_dim_y,
9026  const uint16_t in_tensor_ch,
9027  const q7_t *ker_weight,
9028  const uint16_t out_tensor_ch,
9029  const uint16_t ker_dim_x,
9030  const uint16_t ker_dim_y,
9031  const uint16_t pad_x,
9032  const uint16_t pad_y,
9033  const uint16_t stride_x,
9034  const uint16_t stride_y,
9035  const q31_t *bias,
9036  const uint16_t pre_rshift,
9037  const uint16_t out_scale,
9038  const uint16_t post_rshift,
9039  q15_t *out_tensor,
9040  const uint16_t out_tensor_dim_x,
9041  const uint16_t out_tensor_dim_y,
9042  q15_t *in_tmp_buf)
9043 {
9044 #if defined(__zcc__)
9045  return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9046  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9047  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9048  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9049  out_tensor_dim_y, in_tmp_buf);
9050 #else
9051  return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9052  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9053  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9054  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9055  out_tensor_dim_y, in_tmp_buf);
9056 #endif
9057 }
9058 
9103 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor,
9104  const uint16_t in_tensor_dim_x,
9105  const uint16_t in_tensor_dim_y,
9106  const uint16_t in_tensor_ch,
9107  const q7_t *ker_weight,
9108  const uint16_t out_tensor_ch,
9109  const uint16_t ker_dim_x,
9110  const uint16_t ker_dim_y,
9111  const uint16_t pad_x,
9112  const uint16_t pad_y,
9113  const uint16_t stride_x,
9114  const uint16_t stride_y,
9115  const uint16_t pre_rshift,
9116  const uint16_t out_scale,
9117  const uint16_t post_rshift,
9118  q7_t *out_tensor,
9119  const uint16_t out_tensor_dim_x,
9120  const uint16_t out_tensor_dim_y,
9121  q15_t *in_tmp_buf)
9122 {
9123 #if defined(__zcc__)
9124  return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9125  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9126  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9127  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9128  out_tensor_dim_y, in_tmp_buf);
9129 #else
9130  return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9131  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9132  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9133  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9134  out_tensor_dim_y, in_tmp_buf);
9135 #endif
9136 }
9137 
9183 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor,
9184  const uint16_t in_tensor_dim_x,
9185  const uint16_t in_tensor_dim_y,
9186  const uint16_t in_tensor_ch,
9187  const q7_t *ker_weight,
9188  const uint16_t out_tensor_ch,
9189  const uint16_t ker_dim_x,
9190  const uint16_t ker_dim_y,
9191  const uint16_t pad_x,
9192  const uint16_t pad_y,
9193  const uint16_t stride_x,
9194  const uint16_t stride_y,
9195  const uint16_t pre_rshift,
9196  const uint16_t out_scale,
9197  const uint16_t post_rshift,
9198  q15_t *out_tensor,
9199  const uint16_t out_tensor_dim_x,
9200  const uint16_t out_tensor_dim_y,
9201  q15_t *in_tmp_buf)
9202 {
9203 #if defined(__zcc__)
9204  return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9205  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9206  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9207  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9208  out_tensor_dim_y, in_tmp_buf);
9209 #else
9210  return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9211  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9212  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9213  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9214  out_tensor_dim_y, in_tmp_buf);
9215 #endif
9216 }
9217 
9262 static inline int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor,
9263  const uint16_t in_tensor_dim_x,
9264  const uint16_t in_tensor_dim_y,
9265  const uint16_t in_tensor_ch,
9266  const q7_t *ker_weight,
9267  const uint16_t out_tensor_ch,
9268  const uint16_t ker_dim_x,
9269  const uint16_t ker_dim_y,
9270  const uint16_t pad_x,
9271  const uint16_t pad_y,
9272  const uint16_t stride_x,
9273  const uint16_t stride_y,
9274  const uint16_t pre_rshift,
9275  const uint16_t out_scale,
9276  const uint16_t post_rshift,
9277  u8_t *out_tensor,
9278  const uint16_t out_tensor_dim_x,
9279  const uint16_t out_tensor_dim_y,
9280  q15_t *in_tmp_buf)
9281 {
9282 #if defined(__zcc__)
9283  return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9284  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9285  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9286  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9287  out_tensor_dim_y, in_tmp_buf);
9288 #else
9289  return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9290  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9291  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9292  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9293  out_tensor_dim_y, in_tmp_buf);
9294 #endif
9295 }
9296 
9342 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor,
9343  const uint16_t in_tensor_dim_x,
9344  const uint16_t in_tensor_dim_y,
9345  const uint16_t in_tensor_ch,
9346  const q7_t *ker_weight,
9347  const uint16_t out_tensor_ch,
9348  const uint16_t ker_dim_x,
9349  const uint16_t ker_dim_y,
9350  const uint16_t pad_x,
9351  const uint16_t pad_y,
9352  const uint16_t stride_x,
9353  const uint16_t stride_y,
9354  const uint16_t pre_rshift,
9355  const uint16_t out_scale,
9356  const uint16_t post_rshift,
9357  q7_t *out_tensor,
9358  const uint16_t out_tensor_dim_x,
9359  const uint16_t out_tensor_dim_y,
9360  q15_t *in_tmp_buf)
9361 {
9362 #if defined(__zcc__)
9363  return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9364  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9365  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9366  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9367  out_tensor_dim_y, in_tmp_buf);
9368 #else
9369  return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9370  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9371  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9372  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9373  out_tensor_dim_y, in_tmp_buf);
9374 #endif
9375 }
9376 
9422 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor,
9423  const uint16_t in_tensor_dim_x,
9424  const uint16_t in_tensor_dim_y,
9425  const uint16_t in_tensor_ch,
9426  const q7_t *ker_weight,
9427  const uint16_t out_tensor_ch,
9428  const uint16_t ker_dim_x,
9429  const uint16_t ker_dim_y,
9430  const uint16_t pad_x,
9431  const uint16_t pad_y,
9432  const uint16_t stride_x,
9433  const uint16_t stride_y,
9434  const uint16_t pre_rshift,
9435  const uint16_t out_scale,
9436  const uint16_t post_rshift,
9437  q15_t *out_tensor,
9438  const uint16_t out_tensor_dim_x,
9439  const uint16_t out_tensor_dim_y,
9440  q15_t *in_tmp_buf)
9441 {
9442 #if defined(__zcc__)
9443  return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9444  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9445  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9446  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9447  out_tensor_dim_y, in_tmp_buf);
9448 #else
9449  return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9450  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9451  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9452  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9453  out_tensor_dim_y, in_tmp_buf);
9454 #endif
9455 }
9456 
9488 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(const q7_t *in_tensor,
9489  const uint16_t in_tensor_dim,
9490  const q7_t *ker_weight,
9491  const uint16_t out_tensor_ch,
9492  const uint16_t ker_dim,
9493  const uint16_t pad,
9494  const uint16_t stride,
9495  const q31_t *bias,
9496  const uint16_t pre_rshift,
9497  const uint16_t out_scale,
9498  const uint16_t post_rshift,
9499  q7_t *out_tensor,
9500  const uint16_t out_tensor_dim,
9501  q15_t *in_tmp_buf,
9502  q15_t *wt_tmp_buf)
9503 {
9504 #if defined(__zcc__)
9505  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9506  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9507  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9508  in_tmp_buf, wt_tmp_buf);
9509 #else
9510  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9511  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9512  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9513  in_tmp_buf, wt_tmp_buf);
9514 #endif
9515 }
9516 
9549 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(const q7_t *in_tensor,
9550  const uint16_t in_tensor_dim,
9551  const q7_t *ker_weight,
9552  const uint16_t out_tensor_ch,
9553  const uint16_t ker_dim,
9554  const uint16_t pad,
9555  const uint16_t stride,
9556  const q31_t *bias,
9557  const uint16_t pre_rshift,
9558  const uint16_t out_scale,
9559  const uint16_t post_rshift,
9560  q15_t *out_tensor,
9561  const uint16_t out_tensor_dim,
9562  q15_t *in_tmp_buf,
9563  q15_t *wt_tmp_buf)
9564 {
9565 #if defined(__zcc__)
9566  return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9567  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9568  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9569  in_tmp_buf, wt_tmp_buf);
9570 #else
9571  return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9572  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9573  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9574  in_tmp_buf, wt_tmp_buf);
9575 #endif
9576 }
9577 
9609 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
9610  const uint16_t in_tensor_dim,
9611  const q7_t *ker_weight,
9612  const uint16_t out_tensor_ch,
9613  const uint16_t ker_dim,
9614  const uint16_t pad,
9615  const uint16_t stride,
9616  const q31_t *bias,
9617  const uint16_t pre_rshift,
9618  const uint16_t out_scale,
9619  const uint16_t post_rshift,
9620  u8_t *out_tensor,
9621  const uint16_t out_tensor_dim,
9622  q15_t *in_tmp_buf,
9623  q15_t *wt_tmp_buf)
9624 {
9625 #if defined(__zcc__)
9626  return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9627  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9628  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9629  in_tmp_buf, wt_tmp_buf);
9630 #else
9631  return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9632  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9633  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9634  in_tmp_buf, wt_tmp_buf);
9635 #endif
9636 }
9637 
9669 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
9670  const uint16_t in_tensor_dim,
9671  const q7_t *ker_weight,
9672  const uint16_t out_tensor_ch,
9673  const uint16_t ker_dim,
9674  const uint16_t pad,
9675  const uint16_t stride,
9676  const q31_t *bias,
9677  const uint16_t pre_rshift,
9678  const uint16_t out_scale,
9679  const uint16_t post_rshift,
9680  q7_t *out_tensor,
9681  const uint16_t out_tensor_dim,
9682  q15_t *in_tmp_buf,
9683  q15_t *wt_tmp_buf)
9684 {
9685 #if defined(__zcc__)
9686  return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9687  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9688  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9689  in_tmp_buf, wt_tmp_buf);
9690 #else
9691  return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9692  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9693  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9694  in_tmp_buf, wt_tmp_buf);
9695 #endif
9696 }
9697 
9730 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
9731  const uint16_t in_tensor_dim,
9732  const q7_t *ker_weight,
9733  const uint16_t out_tensor_ch,
9734  const uint16_t ker_dim,
9735  const uint16_t pad,
9736  const uint16_t stride,
9737  const q31_t *bias,
9738  const uint16_t pre_rshift,
9739  const uint16_t out_scale,
9740  const uint16_t post_rshift,
9741  q15_t *out_tensor,
9742  const uint16_t out_tensor_dim,
9743  q15_t *in_tmp_buf,
9744  q15_t *wt_tmp_buf)
9745 {
9746 #if defined(__zcc__)
9747  return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9748  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9749  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9750  in_tmp_buf, wt_tmp_buf);
9751 #else
9752  return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9753  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9754  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9755  in_tmp_buf, wt_tmp_buf);
9756 #endif
9757 }
9758 
9789 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(const q7_t *in_tensor,
9790  const uint16_t in_tensor_dim,
9791  const q7_t *ker_weight,
9792  const uint16_t out_tensor_ch,
9793  const uint16_t ker_dim,
9794  const uint16_t pad,
9795  const uint16_t stride,
9796  const uint16_t pre_rshift,
9797  const uint16_t out_scale,
9798  const uint16_t post_rshift,
9799  q7_t *out_tensor,
9800  const uint16_t out_tensor_dim,
9801  q15_t *in_tmp_buf,
9802  q15_t *wt_tmp_buf)
9803 {
9804 #if defined(__zcc__)
9805  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9806  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9807  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9808  in_tmp_buf, wt_tmp_buf);
9809 #else
9810  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9811  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9812  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9813  in_tmp_buf, wt_tmp_buf);
9814 #endif
9815 }
9816 
9847 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(const q7_t *in_tensor,
9848  const uint16_t in_tensor_dim,
9849  const q7_t *ker_weight,
9850  const uint16_t out_tensor_ch,
9851  const uint16_t ker_dim,
9852  const uint16_t pad,
9853  const uint16_t stride,
9854  const uint16_t pre_rshift,
9855  const uint16_t out_scale,
9856  const uint16_t post_rshift,
9857  q15_t *out_tensor,
9858  const uint16_t out_tensor_dim,
9859  q15_t *in_tmp_buf,
9860  q15_t *wt_tmp_buf)
9861 {
9862 #if defined(__zcc__)
9863  return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9864  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9865  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9866  in_tmp_buf, wt_tmp_buf);
9867 #else
9868  return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9869  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9870  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9871  in_tmp_buf, wt_tmp_buf);
9872 #endif
9873 }
9874 
9905 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(const u8_t *in_tensor,
9906  const uint16_t in_tensor_dim,
9907  const q7_t *ker_weight,
9908  const uint16_t out_tensor_ch,
9909  const uint16_t ker_dim,
9910  const uint16_t pad,
9911  const uint16_t stride,
9912  const uint16_t pre_rshift,
9913  const uint16_t out_scale,
9914  const uint16_t post_rshift,
9915  u8_t *out_tensor,
9916  const uint16_t out_tensor_dim,
9917  q15_t *in_tmp_buf,
9918  q15_t *wt_tmp_buf)
9919 {
9920 #if defined(__zcc__)
9921  return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9922  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9923  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9924  in_tmp_buf, wt_tmp_buf);
9925 #else
9926  return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9927  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9928  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9929  in_tmp_buf, wt_tmp_buf);
9930 #endif
9931 }
9932 
9963 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(const u8_t *in_tensor,
9964  const uint16_t in_tensor_dim,
9965  const q7_t *ker_weight,
9966  const uint16_t out_tensor_ch,
9967  const uint16_t ker_dim,
9968  const uint16_t pad,
9969  const uint16_t stride,
9970  const uint16_t pre_rshift,
9971  const uint16_t out_scale,
9972  const uint16_t post_rshift,
9973  q7_t *out_tensor,
9974  const uint16_t out_tensor_dim,
9975  q15_t *in_tmp_buf,
9976  q15_t *wt_tmp_buf)
9977 {
9978 #if defined(__zcc__)
9979  return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9980  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9981  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9982  in_tmp_buf, wt_tmp_buf);
9983 #else
9984  return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9985  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9986  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9987  in_tmp_buf, wt_tmp_buf);
9988 #endif
9989 }
9990 
10021 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(const u8_t *in_tensor,
10022  const uint16_t in_tensor_dim,
10023  const q7_t *ker_weight,
10024  const uint16_t out_tensor_ch,
10025  const uint16_t ker_dim,
10026  const uint16_t pad,
10027  const uint16_t stride,
10028  const uint16_t pre_rshift,
10029  const uint16_t out_scale,
10030  const uint16_t post_rshift,
10031  q15_t *out_tensor,
10032  const uint16_t out_tensor_dim,
10033  q15_t *in_tmp_buf,
10034  q15_t *wt_tmp_buf)
10035 {
10036 #if defined(__zcc__)
10037  return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
10038  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10039  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10040  in_tmp_buf, wt_tmp_buf);
10041 #else
10042  return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
10043  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10044  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10045  in_tmp_buf, wt_tmp_buf);
10046 #endif
10047 }
10048 
10079 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast(const q7_t *in_tensor,
10080  const uint16_t in_tensor_dim,
10081  const uint16_t in_tensor_ch,
10082  const q7_t *ker_weight,
10083  const uint16_t out_tensor_ch,
10084  const uint16_t ker_dim,
10085  const uint16_t pad,
10086  const uint16_t stride,
10087  const q31_t *bias,
10088  const uint16_t pre_rshift,
10089  const uint16_t out_scale,
10090  const uint16_t post_rshift,
10091  q7_t *out_tensor,
10092  const uint16_t out_tensor_dim,
10093  q15_t *in_tmp_buf)
10094 {
10095 #if defined(__zcc__)
10096  return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10097  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10098  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10099  out_tensor, out_tensor_dim, in_tmp_buf);
10100 #else
10101  return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10102  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10103  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10104  out_tensor, out_tensor_dim, in_tmp_buf);
10105 #endif
10106 }
10107 
10138 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast(const q7_t *in_tensor,
10139  const uint16_t in_tensor_dim,
10140  const uint16_t in_tensor_ch,
10141  const q7_t *ker_weight,
10142  const uint16_t out_tensor_ch,
10143  const uint16_t ker_dim,
10144  const uint16_t pad,
10145  const uint16_t stride,
10146  const q31_t *bias,
10147  const uint16_t pre_rshift,
10148  const uint16_t out_scale,
10149  const uint16_t post_rshift,
10150  q15_t *out_tensor,
10151  const uint16_t out_tensor_dim,
10152  q15_t *in_tmp_buf)
10153 {
10154 #if defined(__zcc__)
10155  return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10156  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10157  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10158  out_tensor, out_tensor_dim, in_tmp_buf);
10159 #else
10160  return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10161  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10162  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10163  out_tensor, out_tensor_dim, in_tmp_buf);
10164 #endif
10165 }
10166 
10197 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast(const u8_t *in_tensor,
10198  const uint16_t in_tensor_dim,
10199  const uint16_t in_tensor_ch,
10200  const q7_t *ker_weight,
10201  const uint16_t out_tensor_ch,
10202  const uint16_t ker_dim,
10203  const uint16_t pad,
10204  const uint16_t stride,
10205  const q31_t *bias,
10206  const uint16_t pre_rshift,
10207  const uint16_t out_scale,
10208  const uint16_t post_rshift,
10209  u8_t *out_tensor,
10210  const uint16_t out_tensor_dim,
10211  q15_t *in_tmp_buf)
10212 {
10213 #if defined(__zcc__)
10214  return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10215  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10216  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10217  out_tensor, out_tensor_dim, in_tmp_buf);
10218 #else
10219  return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10220  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10221  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10222  out_tensor, out_tensor_dim, in_tmp_buf);
10223 #endif
10224 }
10225 
10256 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast(const u8_t *in_tensor,
10257  const uint16_t in_tensor_dim,
10258  const uint16_t in_tensor_ch,
10259  const q7_t *ker_weight,
10260  const uint16_t out_tensor_ch,
10261  const uint16_t ker_dim,
10262  const uint16_t pad,
10263  const uint16_t stride,
10264  const q31_t *bias,
10265  const uint16_t pre_rshift,
10266  const uint16_t out_scale,
10267  const uint16_t post_rshift,
10268  q7_t *out_tensor,
10269  const uint16_t out_tensor_dim,
10270  q15_t *in_tmp_buf)
10271 {
10272 #if defined(__zcc__)
10273  return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10274  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10275  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10276  out_tensor, out_tensor_dim, in_tmp_buf);
10277 #else
10278  return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10279  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10280  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10281  out_tensor, out_tensor_dim, in_tmp_buf);
10282 #endif
10283 }
10284 
10315 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast(const u8_t *in_tensor,
10316  const uint16_t in_tensor_dim,
10317  const uint16_t in_tensor_ch,
10318  const q7_t *ker_weight,
10319  const uint16_t out_tensor_ch,
10320  const uint16_t ker_dim,
10321  const uint16_t pad,
10322  const uint16_t stride,
10323  const q31_t *bias,
10324  const uint16_t pre_rshift,
10325  const uint16_t out_scale,
10326  const uint16_t post_rshift,
10327  q15_t *out_tensor,
10328  const uint16_t out_tensor_dim,
10329  q15_t *in_tmp_buf)
10330 {
10331 #if defined(__zcc__)
10332  return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10333  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10334  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10335  out_tensor, out_tensor_dim, in_tmp_buf);
10336 #else
10337  return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10338  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10339  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10340  out_tensor, out_tensor_dim, in_tmp_buf);
10341 #endif
10342 }
10343 
10373 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast(const q7_t *in_tensor,
10374  const uint16_t in_tensor_dim,
10375  const uint16_t in_tensor_ch,
10376  const q7_t *ker_weight,
10377  const uint16_t out_tensor_ch,
10378  const uint16_t ker_dim,
10379  const uint16_t pad,
10380  const uint16_t stride,
10381  const uint16_t pre_rshift,
10382  const uint16_t out_scale,
10383  const uint16_t post_rshift,
10384  q7_t *out_tensor,
10385  const uint16_t out_tensor_dim,
10386  q15_t *in_tmp_buf)
10387 {
10388 #if defined(__zcc__)
10389  return tpt_nn_conv_HWC_s8_s8_s8_sym_fast(
10390  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10391  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10392  out_tensor_dim, in_tmp_buf);
10393 #else
10394  return riscv_nn_conv_HWC_s8_s8_s8_sym_fast(
10395  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10396  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10397  out_tensor_dim, in_tmp_buf);
10398 #endif
10399 }
10400 
10430 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast(const q7_t *in_tensor,
10431  const uint16_t in_tensor_dim,
10432  const uint16_t in_tensor_ch,
10433  const q7_t *ker_weight,
10434  const uint16_t out_tensor_ch,
10435  const uint16_t ker_dim,
10436  const uint16_t pad,
10437  const uint16_t stride,
10438  const uint16_t pre_rshift,
10439  const uint16_t out_scale,
10440  const uint16_t post_rshift,
10441  q15_t *out_tensor,
10442  const uint16_t out_tensor_dim,
10443  q15_t *in_tmp_buf)
10444 {
10445 #if defined(__zcc__)
10446  return tpt_nn_conv_HWC_s8_s16_s8_sym_fast(
10447  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10448  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10449  out_tensor_dim, in_tmp_buf);
10450 #else
10451  return riscv_nn_conv_HWC_s8_s16_s8_sym_fast(
10452  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10453  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10454  out_tensor_dim, in_tmp_buf);
10455 #endif
10456 }
10457 
10487 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast(const u8_t *in_tensor,
10488  const uint16_t in_tensor_dim,
10489  const uint16_t in_tensor_ch,
10490  const q7_t *ker_weight,
10491  const uint16_t out_tensor_ch,
10492  const uint16_t ker_dim,
10493  const uint16_t pad,
10494  const uint16_t stride,
10495  const uint16_t pre_rshift,
10496  const uint16_t out_scale,
10497  const uint16_t post_rshift,
10498  u8_t *out_tensor,
10499  const uint16_t out_tensor_dim,
10500  q15_t *in_tmp_buf)
10501 {
10502 #if defined(__zcc__)
10503  return tpt_nn_conv_HWC_u8_u8_s8_sym_fast(
10504  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10505  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10506  out_tensor_dim, in_tmp_buf);
10507 #else
10508  return riscv_nn_conv_HWC_u8_u8_s8_sym_fast(
10509  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10510  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10511  out_tensor_dim, in_tmp_buf);
10512 #endif
10513 }
10514 
10544 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast(const u8_t *in_tensor,
10545  const uint16_t in_tensor_dim,
10546  const uint16_t in_tensor_ch,
10547  const q7_t *ker_weight,
10548  const uint16_t out_tensor_ch,
10549  const uint16_t ker_dim,
10550  const uint16_t pad,
10551  const uint16_t stride,
10552  const uint16_t pre_rshift,
10553  const uint16_t out_scale,
10554  const uint16_t post_rshift,
10555  q7_t *out_tensor,
10556  const uint16_t out_tensor_dim,
10557  q15_t *in_tmp_buf)
10558 {
10559 #if defined(__zcc__)
10560  return tpt_nn_conv_HWC_u8_s8_s8_sym_fast(
10561  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10562  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10563  out_tensor_dim, in_tmp_buf);
10564 #else
10565  return riscv_nn_conv_HWC_u8_s8_s8_sym_fast(
10566  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10567  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10568  out_tensor_dim, in_tmp_buf);
10569 #endif
10570 }
10571 
10601 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast(const u8_t *in_tensor,
10602  const uint16_t in_tensor_dim,
10603  const uint16_t in_tensor_ch,
10604  const q7_t *ker_weight,
10605  const uint16_t out_tensor_ch,
10606  const uint16_t ker_dim,
10607  const uint16_t pad,
10608  const uint16_t stride,
10609  const uint16_t pre_rshift,
10610  const uint16_t out_scale,
10611  const uint16_t post_rshift,
10612  q15_t *out_tensor,
10613  const uint16_t out_tensor_dim,
10614  q15_t *in_tmp_buf)
10615 {
10616 #if defined(__zcc__)
10617  return tpt_nn_conv_HWC_u8_s16_s8_sym_fast(
10618  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10619  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10620  out_tensor_dim, in_tmp_buf);
10621 #else
10622  return riscv_nn_conv_HWC_u8_s16_s8_sym_fast(
10623  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10624  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10625  out_tensor_dim, in_tmp_buf);
10626 #endif
10627 }
10628 
10665 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor,
10666  const uint16_t in_tensor_dim_x,
10667  const uint16_t in_tensor_dim_y,
10668  const uint16_t in_tensor_ch,
10669  const q7_t *ker_weight,
10670  const uint16_t out_tensor_ch,
10671  const uint16_t ker_dim_x,
10672  const uint16_t ker_dim_y,
10673  const uint16_t pad_x,
10674  const uint16_t pad_y,
10675  const uint16_t stride_x,
10676  const uint16_t stride_y,
10677  const q31_t *bias,
10678  const uint16_t pre_rshift,
10679  const uint16_t out_scale,
10680  const uint16_t post_rshift,
10681  q7_t *out_tensor,
10682  const uint16_t out_tensor_dim_x,
10683  const uint16_t out_tensor_dim_y,
10684  q15_t *in_tmp_buf)
10685 {
10686 #if defined(__zcc__)
10687  return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10688  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10689  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10690  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10691  out_tensor_dim_y, in_tmp_buf);
10692 #else
10693  return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10694  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10695  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10696  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10697  out_tensor_dim_y, in_tmp_buf);
10698 #endif
10699 }
10700 
10738 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor,
10739  const uint16_t in_tensor_dim_x,
10740  const uint16_t in_tensor_dim_y,
10741  const uint16_t in_tensor_ch,
10742  const q7_t *ker_weight,
10743  const uint16_t out_tensor_ch,
10744  const uint16_t ker_dim_x,
10745  const uint16_t ker_dim_y,
10746  const uint16_t pad_x,
10747  const uint16_t pad_y,
10748  const uint16_t stride_x,
10749  const uint16_t stride_y,
10750  const q31_t *bias,
10751  const uint16_t pre_rshift,
10752  const uint16_t out_scale,
10753  const uint16_t post_rshift,
10754  q15_t *out_tensor,
10755  const uint16_t out_tensor_dim_x,
10756  const uint16_t out_tensor_dim_y,
10757  q15_t *in_tmp_buf)
10758 {
10759 #if defined(__zcc__)
10760  return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10761  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10762  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10763  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10764  out_tensor_dim_y, in_tmp_buf);
10765 #else
10766  return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10767  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10768  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10769  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10770  out_tensor_dim_y, in_tmp_buf);
10771 #endif
10772 }
10773 
10810 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor,
10811  const uint16_t in_tensor_dim_x,
10812  const uint16_t in_tensor_dim_y,
10813  const uint16_t in_tensor_ch,
10814  const q7_t *ker_weight,
10815  const uint16_t out_tensor_ch,
10816  const uint16_t ker_dim_x,
10817  const uint16_t ker_dim_y,
10818  const uint16_t pad_x,
10819  const uint16_t pad_y,
10820  const uint16_t stride_x,
10821  const uint16_t stride_y,
10822  const q31_t *bias,
10823  const uint16_t pre_rshift,
10824  const uint16_t out_scale,
10825  const uint16_t post_rshift,
10826  u8_t *out_tensor,
10827  const uint16_t out_tensor_dim_x,
10828  const uint16_t out_tensor_dim_y,
10829  q15_t *in_tmp_buf)
10830 {
10831 #if defined(__zcc__)
10832  return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10833  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10834  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10835  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10836  out_tensor_dim_y, in_tmp_buf);
10837 #else
10838  return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10839  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10840  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10841  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10842  out_tensor_dim_y, in_tmp_buf);
10843 #endif
10844 }
10845 
10883 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor,
10884  const uint16_t in_tensor_dim_x,
10885  const uint16_t in_tensor_dim_y,
10886  const uint16_t in_tensor_ch,
10887  const q7_t *ker_weight,
10888  const uint16_t out_tensor_ch,
10889  const uint16_t ker_dim_x,
10890  const uint16_t ker_dim_y,
10891  const uint16_t pad_x,
10892  const uint16_t pad_y,
10893  const uint16_t stride_x,
10894  const uint16_t stride_y,
10895  const q31_t *bias,
10896  const uint16_t pre_rshift,
10897  const uint16_t out_scale,
10898  const uint16_t post_rshift,
10899  q7_t *out_tensor,
10900  const uint16_t out_tensor_dim_x,
10901  const uint16_t out_tensor_dim_y,
10902  q15_t *in_tmp_buf)
10903 {
10904 #if defined(__zcc__)
10905  return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10906  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10907  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10908  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10909  out_tensor_dim_y, in_tmp_buf);
10910 #else
10911  return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10912  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10913  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10914  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10915  out_tensor_dim_y, in_tmp_buf);
10916 #endif
10917 }
10918 
10956 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor,
10957  const uint16_t in_tensor_dim_x,
10958  const uint16_t in_tensor_dim_y,
10959  const uint16_t in_tensor_ch,
10960  const q7_t *ker_weight,
10961  const uint16_t out_tensor_ch,
10962  const uint16_t ker_dim_x,
10963  const uint16_t ker_dim_y,
10964  const uint16_t pad_x,
10965  const uint16_t pad_y,
10966  const uint16_t stride_x,
10967  const uint16_t stride_y,
10968  const q31_t *bias,
10969  const uint16_t pre_rshift,
10970  const uint16_t out_scale,
10971  const uint16_t post_rshift,
10972  q15_t *out_tensor,
10973  const uint16_t out_tensor_dim_x,
10974  const uint16_t out_tensor_dim_y,
10975  q15_t *in_tmp_buf)
10976 {
10977 #if defined(__zcc__)
10978  return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10979  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10980  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10981  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10982  out_tensor_dim_y, in_tmp_buf);
10983 #else
10984  return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10985  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10986  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10987  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10988  out_tensor_dim_y, in_tmp_buf);
10989 #endif
10990 }
10991 
11027 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor,
11028  const uint16_t in_tensor_dim_x,
11029  const uint16_t in_tensor_dim_y,
11030  const uint16_t in_tensor_ch,
11031  const q7_t *ker_weight,
11032  const uint16_t out_tensor_ch,
11033  const uint16_t ker_dim_x,
11034  const uint16_t ker_dim_y,
11035  const uint16_t pad_x,
11036  const uint16_t pad_y,
11037  const uint16_t stride_x,
11038  const uint16_t stride_y,
11039  const uint16_t pre_rshift,
11040  const uint16_t out_scale,
11041  const uint16_t post_rshift,
11042  q7_t *out_tensor,
11043  const uint16_t out_tensor_dim_x,
11044  const uint16_t out_tensor_dim_y,
11045  q15_t *in_tmp_buf)
11046 {
11047 #if defined(__zcc__)
11048  return tpt_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11049  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11050  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11051  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11052  out_tensor_dim_y, in_tmp_buf);
11053 #else
11054  return riscv_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11055  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11056  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11057  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11058  out_tensor_dim_y, in_tmp_buf);
11059 #endif
11060 }
11061 
11097 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor,
11098  const uint16_t in_tensor_dim_x,
11099  const uint16_t in_tensor_dim_y,
11100  const uint16_t in_tensor_ch,
11101  const q7_t *ker_weight,
11102  const uint16_t out_tensor_ch,
11103  const uint16_t ker_dim_x,
11104  const uint16_t ker_dim_y,
11105  const uint16_t pad_x,
11106  const uint16_t pad_y,
11107  const uint16_t stride_x,
11108  const uint16_t stride_y,
11109  const uint16_t pre_rshift,
11110  const uint16_t out_scale,
11111  const uint16_t post_rshift,
11112  q15_t *out_tensor,
11113  const uint16_t out_tensor_dim_x,
11114  const uint16_t out_tensor_dim_y,
11115  q15_t *in_tmp_buf)
11116 {
11117 #if defined(__zcc__)
11118  return tpt_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11119  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11120  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11121  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11122  out_tensor_dim_y, in_tmp_buf);
11123 #else
11124  return riscv_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11125  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11126  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11127  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11128  out_tensor_dim_y, in_tmp_buf);
11129 #endif
11130 }
11131 
11167 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor,
11168  const uint16_t in_tensor_dim_x,
11169  const uint16_t in_tensor_dim_y,
11170  const uint16_t in_tensor_ch,
11171  const q7_t *ker_weight,
11172  const uint16_t out_tensor_ch,
11173  const uint16_t ker_dim_x,
11174  const uint16_t ker_dim_y,
11175  const uint16_t pad_x,
11176  const uint16_t pad_y,
11177  const uint16_t stride_x,
11178  const uint16_t stride_y,
11179  const uint16_t pre_rshift,
11180  const uint16_t out_scale,
11181  const uint16_t post_rshift,
11182  u8_t *out_tensor,
11183  const uint16_t out_tensor_dim_x,
11184  const uint16_t out_tensor_dim_y,
11185  q15_t *in_tmp_buf)
11186 {
11187 #if defined(__zcc__)
11188  return tpt_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11189  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11190  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11191  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11192  out_tensor_dim_y, in_tmp_buf);
11193 #else
11194  return riscv_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11195  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11196  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11197  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11198  out_tensor_dim_y, in_tmp_buf);
11199 #endif
11200 }
11201 
11237 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor,
11238  const uint16_t in_tensor_dim_x,
11239  const uint16_t in_tensor_dim_y,
11240  const uint16_t in_tensor_ch,
11241  const q7_t *ker_weight,
11242  const uint16_t out_tensor_ch,
11243  const uint16_t ker_dim_x,
11244  const uint16_t ker_dim_y,
11245  const uint16_t pad_x,
11246  const uint16_t pad_y,
11247  const uint16_t stride_x,
11248  const uint16_t stride_y,
11249  const uint16_t pre_rshift,
11250  const uint16_t out_scale,
11251  const uint16_t post_rshift,
11252  q7_t *out_tensor,
11253  const uint16_t out_tensor_dim_x,
11254  const uint16_t out_tensor_dim_y,
11255  q15_t *in_tmp_buf)
11256 {
11257 #if defined(__zcc__)
11258  return tpt_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11259  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11260  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11261  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11262  out_tensor_dim_y, in_tmp_buf);
11263 #else
11264  return riscv_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11265  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11266  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11267  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11268  out_tensor_dim_y, in_tmp_buf);
11269 #endif
11270 }
11271 
11307 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor,
11308  const uint16_t in_tensor_dim_x,
11309  const uint16_t in_tensor_dim_y,
11310  const uint16_t in_tensor_ch,
11311  const q7_t *ker_weight,
11312  const uint16_t out_tensor_ch,
11313  const uint16_t ker_dim_x,
11314  const uint16_t ker_dim_y,
11315  const uint16_t pad_x,
11316  const uint16_t pad_y,
11317  const uint16_t stride_x,
11318  const uint16_t stride_y,
11319  const uint16_t pre_rshift,
11320  const uint16_t out_scale,
11321  const uint16_t post_rshift,
11322  q15_t *out_tensor,
11323  const uint16_t out_tensor_dim_x,
11324  const uint16_t out_tensor_dim_y,
11325  q15_t *in_tmp_buf)
11326 {
11327 #if defined(__zcc__)
11328  return tpt_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11329  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11330  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11331  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11332  out_tensor_dim_y, in_tmp_buf);
11333 #else
11334  return riscv_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11335  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11336  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11337  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11338  out_tensor_dim_y, in_tmp_buf);
11339 #endif
11340 }
11341 
11342 
11373 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias(const q7_t *in_tensor,
11374  const uint16_t in_tensor_dim,
11375  const uint16_t in_tensor_ch,
11376  const q7_t *ker_weight,
11377  const uint16_t out_tensor_ch,
11378  const uint16_t ker_dim,
11379  const uint16_t pad,
11380  const uint16_t stride,
11381  const q31_t *bias,
11382  const uint16_t pre_rshift,
11383  const uint16_t out_scale,
11384  const uint16_t post_rshift,
11385  q7_t *out_tensor,
11386  const uint16_t out_tensor_dim,
11387  q15_t *in_tmp_buf)
11388 {
11389 #if defined(__zcc__)
11390  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11391  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11392  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11393  out_tensor, out_tensor_dim, in_tmp_buf);
11394 #else
11395  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11396  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11397  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11398  out_tensor, out_tensor_dim, in_tmp_buf);
11399 #endif
11400 }
11401 
11432 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias(const q7_t *in_tensor,
11433  const uint16_t in_tensor_dim,
11434  const uint16_t in_tensor_ch,
11435  const q7_t *ker_weight,
11436  const uint16_t out_tensor_ch,
11437  const uint16_t ker_dim,
11438  const uint16_t pad,
11439  const uint16_t stride,
11440  const q31_t *bias,
11441  const uint16_t pre_rshift,
11442  const uint16_t out_scale,
11443  const uint16_t post_rshift,
11444  q15_t *out_tensor,
11445  const uint16_t out_tensor_dim,
11446  q15_t *in_tmp_buf)
11447 {
11448 #if defined(__zcc__)
11449  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11450  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11451  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11452  out_tensor, out_tensor_dim, in_tmp_buf);
11453 #else
11454  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11455  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11456  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11457  out_tensor, out_tensor_dim, in_tmp_buf);
11458 #endif
11459 }
11460 
11491 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias(const u8_t *in_tensor,
11492  const uint16_t in_tensor_dim,
11493  const uint16_t in_tensor_ch,
11494  const q7_t *ker_weight,
11495  const uint16_t out_tensor_ch,
11496  const uint16_t ker_dim,
11497  const uint16_t pad,
11498  const uint16_t stride,
11499  const q31_t *bias,
11500  const uint16_t pre_rshift,
11501  const uint16_t out_scale,
11502  const uint16_t post_rshift,
11503  u8_t *out_tensor,
11504  const uint16_t out_tensor_dim,
11505  q15_t *in_tmp_buf)
11506 {
11507 #if defined(__zcc__)
11508  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11509  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11510  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11511  out_tensor, out_tensor_dim, in_tmp_buf);
11512 #else
11513  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11514  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11515  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11516  out_tensor, out_tensor_dim, in_tmp_buf);
11517 #endif
11518 }
11519 
11550 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias(const u8_t *in_tensor,
11551  const uint16_t in_tensor_dim,
11552  const uint16_t in_tensor_ch,
11553  const q7_t *ker_weight,
11554  const uint16_t out_tensor_ch,
11555  const uint16_t ker_dim,
11556  const uint16_t pad,
11557  const uint16_t stride,
11558  const q31_t *bias,
11559  const uint16_t pre_rshift,
11560  const uint16_t out_scale,
11561  const uint16_t post_rshift,
11562  q7_t *out_tensor,
11563  const uint16_t out_tensor_dim,
11564  q15_t *in_tmp_buf)
11565 {
11566 #if defined(__zcc__)
11567  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11568  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11569  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11570  out_tensor, out_tensor_dim, in_tmp_buf);
11571 #else
11572  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11573  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11574  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11575  out_tensor, out_tensor_dim, in_tmp_buf);
11576 #endif
11577 }
11578 
11609 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias(const u8_t *in_tensor,
11610  const uint16_t in_tensor_dim,
11611  const uint16_t in_tensor_ch,
11612  const q7_t *ker_weight,
11613  const uint16_t out_tensor_ch,
11614  const uint16_t ker_dim,
11615  const uint16_t pad,
11616  const uint16_t stride,
11617  const q31_t *bias,
11618  const uint16_t pre_rshift,
11619  const uint16_t out_scale,
11620  const uint16_t post_rshift,
11621  q15_t *out_tensor,
11622  const uint16_t out_tensor_dim,
11623  q15_t *in_tmp_buf)
11624 {
11625 #if defined(__zcc__)
11626  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11627  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11628  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11629  out_tensor, out_tensor_dim, in_tmp_buf);
11630 #else
11631  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11632  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11633  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11634  out_tensor, out_tensor_dim, in_tmp_buf);
11635 #endif
11636 }
11637 
11667 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym(const q7_t *in_tensor,
11668  const uint16_t in_tensor_dim,
11669  const uint16_t in_tensor_ch,
11670  const q7_t *ker_weight,
11671  const uint16_t out_tensor_ch,
11672  const uint16_t ker_dim,
11673  const uint16_t pad,
11674  const uint16_t stride,
11675  const uint16_t pre_rshift,
11676  const uint16_t out_scale,
11677  const uint16_t post_rshift,
11678  q7_t *out_tensor,
11679  const uint16_t out_tensor_dim,
11680  q15_t *in_tmp_buf)
11681 {
11682 #if defined(__zcc__)
11683  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym(
11684  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11685  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11686  out_tensor_dim, in_tmp_buf);
11687 #else
11688  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym(
11689  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11690  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11691  out_tensor_dim, in_tmp_buf);
11692 #endif
11693 }
11694 
11724 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym(const q7_t *in_tensor,
11725  const uint16_t in_tensor_dim,
11726  const uint16_t in_tensor_ch,
11727  const q7_t *ker_weight,
11728  const uint16_t out_tensor_ch,
11729  const uint16_t ker_dim,
11730  const uint16_t pad,
11731  const uint16_t stride,
11732  const uint16_t pre_rshift,
11733  const uint16_t out_scale,
11734  const uint16_t post_rshift,
11735  q15_t *out_tensor,
11736  const uint16_t out_tensor_dim,
11737  q15_t *in_tmp_buf)
11738 {
11739 #if defined(__zcc__)
11740  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym(
11741  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11742  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11743  out_tensor_dim, in_tmp_buf);
11744 #else
11745  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym(
11746  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11747  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11748  out_tensor_dim, in_tmp_buf);
11749 #endif
11750 }
11751 
11781 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym(const u8_t *in_tensor,
11782  const uint16_t in_tensor_dim,
11783  const uint16_t in_tensor_ch,
11784  const q7_t *ker_weight,
11785  const uint16_t out_tensor_ch,
11786  const uint16_t ker_dim,
11787  const uint16_t pad,
11788  const uint16_t stride,
11789  const uint16_t pre_rshift,
11790  const uint16_t out_scale,
11791  const uint16_t post_rshift,
11792  u8_t *out_tensor,
11793  const uint16_t out_tensor_dim,
11794  q15_t *in_tmp_buf)
11795 {
11796 #if defined(__zcc__)
11797  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym(
11798  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11799  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11800  out_tensor_dim, in_tmp_buf);
11801 #else
11802  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym(
11803  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11804  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11805  out_tensor_dim, in_tmp_buf);
11806 #endif
11807 }
11808 
11838 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym(const u8_t *in_tensor,
11839  const uint16_t in_tensor_dim,
11840  const uint16_t in_tensor_ch,
11841  const q7_t *ker_weight,
11842  const uint16_t out_tensor_ch,
11843  const uint16_t ker_dim,
11844  const uint16_t pad,
11845  const uint16_t stride,
11846  const uint16_t pre_rshift,
11847  const uint16_t out_scale,
11848  const uint16_t post_rshift,
11849  q7_t *out_tensor,
11850  const uint16_t out_tensor_dim,
11851  q15_t *in_tmp_buf)
11852 {
11853 #if defined(__zcc__)
11854  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym(
11855  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11856  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11857  out_tensor_dim, in_tmp_buf);
11858 #else
11859  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym(
11860  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11861  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11862  out_tensor_dim, in_tmp_buf);
11863 #endif
11864 }
11865 
11895 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym(const u8_t *in_tensor,
11896  const uint16_t in_tensor_dim,
11897  const uint16_t in_tensor_ch,
11898  const q7_t *ker_weight,
11899  const uint16_t out_tensor_ch,
11900  const uint16_t ker_dim,
11901  const uint16_t pad,
11902  const uint16_t stride,
11903  const uint16_t pre_rshift,
11904  const uint16_t out_scale,
11905  const uint16_t post_rshift,
11906  q15_t *out_tensor,
11907  const uint16_t out_tensor_dim,
11908  q15_t *in_tmp_buf)
11909 {
11910 #if defined(__zcc__)
11911  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym(
11912  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11913  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11914  out_tensor_dim, in_tmp_buf);
11915 #else
11916  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym(
11917  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11918  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11919  out_tensor_dim, in_tmp_buf);
11920 #endif
11921 }
11922 
11959 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(const q7_t *in_tensor,
11960  const uint16_t in_tensor_dim_x,
11961  const uint16_t in_tensor_dim_y,
11962  const uint16_t in_tensor_ch,
11963  const q7_t *ker_weight,
11964  const uint16_t out_tensor_ch,
11965  const uint16_t ker_dim_x,
11966  const uint16_t ker_dim_y,
11967  const uint16_t pad_x,
11968  const uint16_t pad_y,
11969  const uint16_t stride_x,
11970  const uint16_t stride_y,
11971  const q31_t *bias,
11972  const uint16_t pre_rshift,
11973  const uint16_t out_scale,
11974  const uint16_t post_rshift,
11975  q7_t *out_tensor,
11976  const uint16_t out_tensor_dim_x,
11977  const uint16_t out_tensor_dim_y,
11978  q15_t *in_tmp_buf)
11979 {
11980 #if defined(__zcc__)
11981  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11982  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11983  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11984  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11985  out_tensor_dim_y, in_tmp_buf);
11986 #else
11987  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11988  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11989  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11990  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11991  out_tensor_dim_y, in_tmp_buf);
11992 #endif
11993 }
11994 
12032 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(const q7_t *in_tensor,
12033  const uint16_t in_tensor_dim_x,
12034  const uint16_t in_tensor_dim_y,
12035  const uint16_t in_tensor_ch,
12036  const q7_t *ker_weight,
12037  const uint16_t out_tensor_ch,
12038  const uint16_t ker_dim_x,
12039  const uint16_t ker_dim_y,
12040  const uint16_t pad_x,
12041  const uint16_t pad_y,
12042  const uint16_t stride_x,
12043  const uint16_t stride_y,
12044  const q31_t *bias,
12045  const uint16_t pre_rshift,
12046  const uint16_t out_scale,
12047  const uint16_t post_rshift,
12048  q15_t *out_tensor,
12049  const uint16_t out_tensor_dim_x,
12050  const uint16_t out_tensor_dim_y,
12051  q15_t *in_tmp_buf)
12052 {
12053 #if defined(__zcc__)
12054  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12055  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12056  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12057  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12058  out_tensor_dim_y, in_tmp_buf);
12059 #else
12060  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12061  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12062  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12063  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12064  out_tensor_dim_y, in_tmp_buf);
12065 #endif
12066 }
12067 
12104 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(const u8_t *in_tensor,
12105  const uint16_t in_tensor_dim_x,
12106  const uint16_t in_tensor_dim_y,
12107  const uint16_t in_tensor_ch,
12108  const q7_t *ker_weight,
12109  const uint16_t out_tensor_ch,
12110  const uint16_t ker_dim_x,
12111  const uint16_t ker_dim_y,
12112  const uint16_t pad_x,
12113  const uint16_t pad_y,
12114  const uint16_t stride_x,
12115  const uint16_t stride_y,
12116  const q31_t *bias,
12117  const uint16_t pre_rshift,
12118  const uint16_t out_scale,
12119  const uint16_t post_rshift,
12120  u8_t *out_tensor,
12121  const uint16_t out_tensor_dim_x,
12122  const uint16_t out_tensor_dim_y,
12123  q15_t *in_tmp_buf)
12124 {
12125 #if defined(__zcc__)
12126  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12127  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12128  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12129  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12130  out_tensor_dim_y, in_tmp_buf);
12131 #else
12132  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12133  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12134  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12135  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12136  out_tensor_dim_y, in_tmp_buf);
12137 #endif
12138 }
12139 
12177 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(const u8_t *in_tensor,
12178  const uint16_t in_tensor_dim_x,
12179  const uint16_t in_tensor_dim_y,
12180  const uint16_t in_tensor_ch,
12181  const q7_t *ker_weight,
12182  const uint16_t out_tensor_ch,
12183  const uint16_t ker_dim_x,
12184  const uint16_t ker_dim_y,
12185  const uint16_t pad_x,
12186  const uint16_t pad_y,
12187  const uint16_t stride_x,
12188  const uint16_t stride_y,
12189  const q31_t *bias,
12190  const uint16_t pre_rshift,
12191  const uint16_t out_scale,
12192  const uint16_t post_rshift,
12193  q7_t *out_tensor,
12194  const uint16_t out_tensor_dim_x,
12195  const uint16_t out_tensor_dim_y,
12196  q15_t *in_tmp_buf)
12197 {
12198 #if defined(__zcc__)
12199  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12200  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12201  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12202  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12203  out_tensor_dim_y, in_tmp_buf);
12204 #else
12205  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12206  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12207  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12208  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12209  out_tensor_dim_y, in_tmp_buf);
12210 #endif
12211 }
12212 
12250 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(const u8_t *in_tensor,
12251  const uint16_t in_tensor_dim_x,
12252  const uint16_t in_tensor_dim_y,
12253  const uint16_t in_tensor_ch,
12254  const q7_t *ker_weight,
12255  const uint16_t out_tensor_ch,
12256  const uint16_t ker_dim_x,
12257  const uint16_t ker_dim_y,
12258  const uint16_t pad_x,
12259  const uint16_t pad_y,
12260  const uint16_t stride_x,
12261  const uint16_t stride_y,
12262  const q31_t *bias,
12263  const uint16_t pre_rshift,
12264  const uint16_t out_scale,
12265  const uint16_t post_rshift,
12266  q15_t *out_tensor,
12267  const uint16_t out_tensor_dim_x,
12268  const uint16_t out_tensor_dim_y,
12269  q15_t *in_tmp_buf)
12270 {
12271 #if defined(__zcc__)
12272  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12273  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12274  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12275  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12276  out_tensor_dim_y, in_tmp_buf);
12277 #else
12278  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12279  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12280  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12281  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12282  out_tensor_dim_y, in_tmp_buf);
12283 #endif
12284 }
12285 
12321 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any(const q7_t *in_tensor,
12322  const uint16_t in_tensor_dim_x,
12323  const uint16_t in_tensor_dim_y,
12324  const uint16_t in_tensor_ch,
12325  const q7_t *ker_weight,
12326  const uint16_t out_tensor_ch,
12327  const uint16_t ker_dim_x,
12328  const uint16_t ker_dim_y,
12329  const uint16_t pad_x,
12330  const uint16_t pad_y,
12331  const uint16_t stride_x,
12332  const uint16_t stride_y,
12333  const uint16_t pre_rshift,
12334  const uint16_t out_scale,
12335  const uint16_t post_rshift,
12336  q7_t *out_tensor,
12337  const uint16_t out_tensor_dim_x,
12338  const uint16_t out_tensor_dim_y,
12339  q15_t *in_tmp_buf)
12340 {
12341 #if defined(__zcc__)
12342  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12343  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12344  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12345  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12346  out_tensor_dim_y, in_tmp_buf);
12347 #else
12348  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12349  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12350  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12351  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12352  out_tensor_dim_y, in_tmp_buf);
12353 #endif
12354 }
12355 
12392 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any(const q7_t *in_tensor,
12393  const uint16_t in_tensor_dim_x,
12394  const uint16_t in_tensor_dim_y,
12395  const uint16_t in_tensor_ch,
12396  const q7_t *ker_weight,
12397  const uint16_t out_tensor_ch,
12398  const uint16_t ker_dim_x,
12399  const uint16_t ker_dim_y,
12400  const uint16_t pad_x,
12401  const uint16_t pad_y,
12402  const uint16_t stride_x,
12403  const uint16_t stride_y,
12404  const uint16_t pre_rshift,
12405  const uint16_t out_scale,
12406  const uint16_t post_rshift,
12407  q15_t *out_tensor,
12408  const uint16_t out_tensor_dim_x,
12409  const uint16_t out_tensor_dim_y,
12410  q15_t *in_tmp_buf)
12411 {
12412 #if defined(__zcc__)
12413  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12414  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12415  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12416  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12417  out_tensor_dim_y, in_tmp_buf);
12418 #else
12419  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12420  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12421  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12422  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12423  out_tensor_dim_y, in_tmp_buf);
12424 #endif
12425 }
12426 
12462 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any(const u8_t *in_tensor,
12463  const uint16_t in_tensor_dim_x,
12464  const uint16_t in_tensor_dim_y,
12465  const uint16_t in_tensor_ch,
12466  const q7_t *ker_weight,
12467  const uint16_t out_tensor_ch,
12468  const uint16_t ker_dim_x,
12469  const uint16_t ker_dim_y,
12470  const uint16_t pad_x,
12471  const uint16_t pad_y,
12472  const uint16_t stride_x,
12473  const uint16_t stride_y,
12474  const uint16_t pre_rshift,
12475  const uint16_t out_scale,
12476  const uint16_t post_rshift,
12477  u8_t *out_tensor,
12478  const uint16_t out_tensor_dim_x,
12479  const uint16_t out_tensor_dim_y,
12480  q15_t *in_tmp_buf)
12481 {
12482 #if defined(__zcc__)
12483  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12484  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12485  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12486  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12487  out_tensor_dim_y, in_tmp_buf);
12488 #else
12489  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12490  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12491  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12492  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12493  out_tensor_dim_y, in_tmp_buf);
12494 #endif
12495 }
12496 
12533 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any(const u8_t *in_tensor,
12534  const uint16_t in_tensor_dim_x,
12535  const uint16_t in_tensor_dim_y,
12536  const uint16_t in_tensor_ch,
12537  const q7_t *ker_weight,
12538  const uint16_t out_tensor_ch,
12539  const uint16_t ker_dim_x,
12540  const uint16_t ker_dim_y,
12541  const uint16_t pad_x,
12542  const uint16_t pad_y,
12543  const uint16_t stride_x,
12544  const uint16_t stride_y,
12545  const uint16_t pre_rshift,
12546  const uint16_t out_scale,
12547  const uint16_t post_rshift,
12548  q7_t *out_tensor,
12549  const uint16_t out_tensor_dim_x,
12550  const uint16_t out_tensor_dim_y,
12551  q15_t *in_tmp_buf)
12552 {
12553 #if defined(__zcc__)
12554  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12555  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12556  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12557  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12558  out_tensor_dim_y, in_tmp_buf);
12559 #else
12560  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12561  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12562  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12563  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12564  out_tensor_dim_y, in_tmp_buf);
12565 #endif
12566 }
12567 
12604 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any(const u8_t *in_tensor,
12605  const uint16_t in_tensor_dim_x,
12606  const uint16_t in_tensor_dim_y,
12607  const uint16_t in_tensor_ch,
12608  const q7_t *ker_weight,
12609  const uint16_t out_tensor_ch,
12610  const uint16_t ker_dim_x,
12611  const uint16_t ker_dim_y,
12612  const uint16_t pad_x,
12613  const uint16_t pad_y,
12614  const uint16_t stride_x,
12615  const uint16_t stride_y,
12616  const uint16_t pre_rshift,
12617  const uint16_t out_scale,
12618  const uint16_t post_rshift,
12619  q15_t *out_tensor,
12620  const uint16_t out_tensor_dim_x,
12621  const uint16_t out_tensor_dim_y,
12622  q15_t *in_tmp_buf)
12623 {
12624 #if defined(__zcc__)
12625  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12626  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12627  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12628  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12629  out_tensor_dim_y, in_tmp_buf);
12630 #else
12631  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12632  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12633  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12634  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12635  out_tensor_dim_y, in_tmp_buf);
12636 #endif
12637 }
12638 
12685 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
12686  const uint16_t in_tensor_dim_x,
12687  const uint16_t in_tensor_dim_y,
12688  const uint16_t in_tensor_ch,
12689  const uint16_t in_tensor_group,
12690  const q7_t *ker_weight,
12691  const uint16_t out_tensor_ch,
12692  const uint16_t pad_x,
12693  const uint16_t pad_y,
12694  const uint16_t stride_x,
12695  const uint16_t stride_y,
12696  const int32_t *bias,
12697  q7_t *out_tensor,
12698  const int32_t *out_shift,
12699  const int32_t *out_scale,
12700  const int32_t out_offset,
12701  const int32_t in_offset,
12702  const int32_t act_min,
12703  const int32_t act_max,
12704  const uint16_t out_tensor_dim_x,
12705  const uint16_t out_tensor_dim_y,
12706  q15_t *tmp_buf)
12707 {
12708 #if defined(__zcc__)
12709 
12710  tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
12711  stride_y, pad_x, pad_y, act_min, act_max};
12712 
12713  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12714 
12715  tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12716  in_tensor_group, out_tensor_ch};
12717 
12718  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12719  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
12720 
12721 #else
12722  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
12723  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12724  in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
12725  stride_y, bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
12726  act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
12727 #endif
12728 }
12729 
12736 static inline int32_t
12738  const uint16_t in_tensor_ch) {
12739 #if defined(__zcc__)convol
12740  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
12741  in_tensor_ch);
12742 #else
12743  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
12744  in_tensor_ch);
12745 #endif
12746 }
12747 
12786 static inline int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
12787  const uint16_t in_tensor_dim_x,
12788  const uint16_t in_tensor_ch,
12789  const uint16_t in_tensor_group,
12790  const q7_t *ker_weight,
12791  const uint16_t out_tensor_ch,
12792  const uint16_t ker_dim_x,
12793  const uint16_t pad_x,
12794  const uint16_t stride_x,
12795  const int32_t *bias,
12796  q7_t *out_tensor,
12797  const int32_t *out_shift,
12798  const int32_t *out_scale,
12799  const int32_t out_offset,
12800  const int32_t in_offset,
12801  const int32_t act_min,
12802  const int32_t act_max,
12803  const uint16_t out_tensor_dim_x,
12804  q15_t *in_tmp_buf)
12805 {
12806 #if defined(__zcc__)
12807 
12808  tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
12809  act_min, act_max};
12810 
12811  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12812 
12813  tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
12814  ker_dim_x, out_tensor_dim_x, out_tensor_ch};
12815 
12816  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12817  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
12818 
12819 #else
12820  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
12821  in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
12822  out_tensor_ch, ker_dim_x, pad_x, stride_x, bias, out_tensor, out_shift,
12823  out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12824  in_tmp_buf);
12825 #endif
12826 }
12827 
12837 static inline int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
12838  const uint16_t ker_dim_x,
12839  const uint16_t ker_dim_y)
12840 {
12841 #if defined(__zcc__)
12842  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
12843  in_tensor_ch, ker_dim_x, ker_dim_y);
12844 #else
12845  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12846  in_tensor_ch, ker_dim_x, ker_dim_y);
12847 #endif
12848 }
12849 
12891 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
12892  const uint16_t in_tensor_dim_x,
12893  const uint16_t in_tensor_dim_y,
12894  const uint16_t in_tensor_ch,
12895  const uint16_t in_tensor_group,
12896  const q7_t *ker_weight,
12897  const uint16_t out_tensor_ch,
12898  const uint16_t ker_dim_x,
12899  const uint16_t ker_dim_y,
12900  const uint16_t pad_x,
12901  const uint16_t pad_y,
12902  const uint16_t stride_x,
12903  const uint16_t stride_y,
12904  const int32_t *bias,
12905  q7_t *out_tensor,
12906  const int32_t *out_shift,
12907  const int32_t *out_scale,
12908  const int32_t out_offset,
12909  const int32_t in_offset,
12910  const int32_t act_min,
12911  const int32_t act_max,
12912  const uint16_t out_tensor_dim_x,
12913  const uint16_t out_tensor_dim_y,
12914  q15_t *in_tmp_buf)
12915 {
12916 #if defined(__zcc__)
12917 
12918  tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
12919  in_offset, out_offset, act_min, act_max};
12920 
12921  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12922 
12923  tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12924  in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
12925  out_tensor_ch};
12926 
12927  return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12928  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
12929 
12930 #else
12931  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
12932  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12933  in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
12934  pad_y, stride_x, stride_y, bias, out_tensor, out_shift, out_scale,
12935  out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12936  out_tensor_dim_y, in_tmp_buf);
12937 #endif
12938 }
12939 
12948 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
12949  const uint16_t ker_dim_x,
12950  const uint16_t ker_dim_y)
12951 {
12952 #if defined(__zcc__)
12953  return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12954  in_tensor_ch, ker_dim_x, ker_dim_y);
12955 #else
12956  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12957  in_tensor_ch, ker_dim_x, ker_dim_y);
12958 #endif
12959 }
12960 
13000 static inline int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(const int8_t *in_tensor,
13001  const int32_t in_tensor_dim_x,
13002  const int32_t in_tensor_dim_y,
13003  const int32_t in_tensor_ch,
13004  const int8_t *ker_weight,
13005  const int32_t out_tensor_ch,
13006  const int32_t pad_x,
13007  const int32_t pad_y,
13008  const int32_t stride_x,
13009  const int32_t stride_y,
13010  const int32_t *bias,
13011  int8_t *out_tensor,
13012  const int32_t *out_shift,
13013  const int32_t *out_scale,
13014  const int32_t out_tensor_dim_x,
13015  const int32_t out_tensor_dim_y,
13016  const int32_t out_offset,
13017  const int32_t in_offset,
13018  const int32_t act_min,
13019  const int32_t act_max,
13020  const int32_t dilation_x,
13021  const int32_t dilation_y,
13022  int16_t *tmp_buf)
13023 {
13024 #if defined(__zcc__)
13025  return tpt_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
13026  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13027  out_tensor_ch, pad_x, pad_y, stride_x, stride_y, bias, out_tensor,
13028  out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
13029  in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
13030 #else
13031  return riscv_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
13032  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13033  out_tensor_ch, pad_x, pad_y, stride_x, stride_y, bias, out_tensor,
13034  out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
13035  in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
13036 #endif
13037 }
13038 
13086 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13087  const uint16_t in_tensor_dim_x,
13088  const uint16_t in_tensor_dim_y,
13089  const uint16_t in_tensor_ch,
13090  const q7_t *ker_weight,
13091  const uint16_t out_tensor_ch,
13092  const uint16_t ch_mult,
13093  const uint16_t ker_dim_x,
13094  const uint16_t ker_dim_y,
13095  const uint16_t pad_x,
13096  const uint16_t pad_y,
13097  const uint16_t stride_x,
13098  const uint16_t stride_y,
13099  const int32_t *bias,
13100  q7_t *out_tensor,
13101  const int32_t *out_shift,
13102  const int32_t *out_scale,
13103  const uint16_t out_tensor_dim_x,
13104  const uint16_t out_tensor_dim_y,
13105  const int32_t out_offset,
13106  const int32_t in_offset,
13107  const int32_t act_min,
13108  const int32_t act_max,
13109  const uint16_t dilation_x,
13110  const uint16_t dilation_y,
13111  q15_t *tmp_buf)
13112 {
13113 #if defined(__zcc__)
13114 
13115  tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13116  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13117 
13118  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13119 
13120  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13121  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13122 
13123  return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13124  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13125 
13126 
13127 #else
13128  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13129  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13130  out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13131  stride_y, bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13132  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13133  dilation_y, tmp_buf);
13134 #endif
13135 }
13136 
13181 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
13182  const uint16_t in_tensor_dim_x,
13183  const uint16_t in_tensor_dim_y,
13184  const uint16_t in_tensor_ch,
13185  const q7_t *ker_weight,
13186  const uint16_t out_tensor_ch,
13187  const uint16_t ker_dim_x,
13188  const uint16_t ker_dim_y,
13189  const uint16_t pad_x,
13190  const uint16_t pad_y,
13191  const uint16_t stride_x,
13192  const uint16_t stride_y,
13193  const int32_t *bias,
13194  q7_t *out_tensor,
13195  const int32_t *out_shift,
13196  const int32_t *out_scale,
13197  const uint16_t out_tensor_dim_x,
13198  const uint16_t out_tensor_dim_y,
13199  const int32_t out_offset,
13200  const int32_t in_offset,
13201  const int32_t act_min,
13202  const int32_t act_max,
13203  const uint16_t dilation_x,
13204  const uint16_t dilation_y,
13205  q15_t *in_tmp_buf)
13206 {
13207 #if defined(__zcc__)
13208 
13209  tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13210  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13211 
13212  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13213 
13214  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13215  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13216 
13217  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13218  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13219 
13220 #else
13221  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13222  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13223  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13224  bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13225  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13226  dilation_y, in_tmp_buf);
13227 #endif
13228 }
13229 
13238 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch,
13239  const uint16_t ker_dim_x,
13240  const uint16_t ker_dim_y)
13241 {
13242 #if defined(__zcc__)
13243  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13244  in_tensor_ch, ker_dim_x, ker_dim_y);
13245 #else
13246  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13247  in_tensor_ch, ker_dim_x, ker_dim_y);
13248 #endif
13249 }
13250 
13291 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(const uint8_t *in_tensor,
13292  const uint16_t in_tensor_dim_x,
13293  const uint16_t in_tensor_dim_y,
13294  const uint16_t in_tensor_ch,
13295  const uint8_t *ker_weight,
13296  const uint16_t ker_dim_x,
13297  const uint16_t ker_dim_y,
13298  const int16_t ch_mult,
13299  const int16_t pad_x,
13300  const int16_t pad_y,
13301  const int16_t stride_x,
13302  const int16_t stride_y,
13303  const int16_t dilation_x,
13304  const int16_t dilation_y,
13305  const int32_t *bias,
13306  const int32_t in_offset,
13307  const int32_t ker_offset,
13308  const int32_t out_offset,
13309  uint8_t *out_tensor,
13310  const uint16_t out_tensor_dim_x,
13311  const uint16_t out_tensor_dim_y,
13312  const int32_t act_min,
13313  const int32_t act_max,
13314  const int32_t out_shift,
13315  const int32_t out_scale)
13316 {
13317 #if defined(__zcc__)
13318  return tpt_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13319  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13320  ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13321  dilation_x, dilation_y, bias, in_offset, ker_offset, out_offset,
13322  out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13323  out_shift, out_scale);
13324 #else
13325  return riscv_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13326  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13327  ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13328  dilation_x, dilation_y, bias, in_offset, ker_offset, out_offset,
13329  out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13330  out_shift, out_scale);
13331 #endif
13332 }
13333 
13334 #ifdef __riscv_zfh
13370 static inline int32_t hpm_nn_conv_1x1_HWC_f16_f16_f16_bias_any(const float16_t *in_tensor,
13371  const uint16_t in_tensor_dim_x,
13372  const uint16_t in_tensor_dim_y,
13373  const uint16_t in_tensor_ch,
13374  const float16_t *ker_weight,
13375  const uint16_t out_tensor_ch,
13376  const uint16_t ker_dim_x,
13377  const uint16_t ker_dim_y,
13378  const uint16_t pad_x,
13379  const uint16_t pad_y,
13380  const uint16_t stride_x,
13381  const uint16_t stride_y,
13382  const float16_t *bias,
13383  float16_t *out_tensor,
13384  const uint16_t out_tensor_dim_x,
13385  const uint16_t out_tensor_dim_y,
13386  float16_t *in_tmp_buf,
13387  float16_t *tmp_buf)
13388 {
13389 #if defined(__zcc__)
13390  return tpt_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13391  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13392  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13393  bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y, in_tmp_buf,
13394  tmp_buf);
13395 #else
13396  return riscv_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13397  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13398  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13399  bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y, in_tmp_buf,
13400  tmp_buf);
13401 #endif
13402 }
13403 
13425 static inline int32_t hpm_nn_conv_HWC_f16_f16_f16_bias(const float16_t *in_tensor,
13426  const uint16_t in_tensor_dim,
13427  const uint16_t in_tensor_ch,
13428  const float16_t *ker_weight,
13429  const uint16_t out_tensor_ch,
13430  const uint16_t ker_dim,
13431  const uint16_t pad,
13432  const uint16_t stride,
13433  const float16_t *bias,
13434  float16_t *out_tensor,
13435  const uint16_t out_tensor_dim,
13436  float16_t *in_tmp_buf,
13437  float16_t *tmp_buf)
13438 {
13439 #if defined(__zcc__)
13440  return tpt_nn_conv_HWC_f16_f16_f16_bias(
13441  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13442  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13443  tmp_buf);
13444 #else
13445  return riscv_nn_conv_HWC_f16_f16_f16_bias(
13446  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13447  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13448  tmp_buf);
13449 #endif
13450 }
13451 
13473 static inline int32_t hpm_nn_conv_dw_HWC_f16_f16_f16_bias(const float16_t *in_tensor,
13474  const uint16_t in_tensor_dim,
13475  const uint16_t in_tensor_ch,
13476  const float16_t *ker_weight,
13477  const uint16_t out_tensor_ch,
13478  const uint16_t ker_dim,
13479  const uint16_t pad,
13480  const uint16_t stride,
13481  const float16_t *bias,
13482  float16_t *out_tensor,
13483  const uint16_t out_tensor_dim,
13484  float16_t *in_tmp_buf,
13485  float16_t *tmp_buf)
13486 {
13487 #if defined(__zcc__)
13488  return tpt_nn_conv_dw_HWC_f16_f16_f16_bias(
13489  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13490  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13491  tmp_buf);
13492 #else
13493  return riscv_nn_conv_dw_HWC_f16_f16_f16_bias(
13494  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13495  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13496  tmp_buf);
13497 #endif
13498 }
13499 #endif
13500 
13505 #endif
13506 
13507 #ifdef HPM_EN_MATH_NN_RVP32_LIB
13508 #if defined(__zcc__)
13509 #include "tpt_nn_convolution.h"
13510 #else
13511 #include "riscv_nn_convolution.h"
13512 #endif
13513 
13555 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13556  const uint16_t in_tensor_dim_x,
13557  const uint16_t in_tensor_dim_y,
13558  const uint16_t in_tensor_ch,
13559  const uint16_t in_tensor_group,
13560  const q7_t *ker_weight,
13561  const uint16_t out_tensor_ch,
13562  const uint16_t ker_dim_x,
13563  const uint16_t ker_dim_y,
13564  const uint16_t pad_x,
13565  const uint16_t pad_y,
13566  const uint16_t stride_x,
13567  const uint16_t stride_y,
13568  const int32_t *bias,
13569  q7_t *out_tensor,
13570  const int32_t *out_shift,
13571  const int32_t *out_scale,
13572  const int32_t out_offset,
13573  const int32_t in_offset,
13574  const int32_t act_min,
13575  const int32_t act_max,
13576  const uint16_t out_tensor_dim_x,
13577  const uint16_t out_tensor_dim_y,
13578  q15_t *in_tmp_buf)
13579 {
13580 #if defined(__zcc__)
13581 
13582  tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
13583  in_offset, out_offset, act_min, act_max};
13584 
13585  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13586 
13587  tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13588  in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
13589  out_tensor_ch};
13590 
13591  return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13592  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13593 
13594 #else
13595  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
13596  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13597  in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
13598  pad_y, stride_x, stride_y, bias, out_tensor, out_shift, out_scale,
13599  out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13600  out_tensor_dim_y, in_tmp_buf);
13601 #endif
13602 }
13603 
13650 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
13651  const uint16_t in_tensor_dim_x,
13652  const uint16_t in_tensor_dim_y,
13653  const uint16_t in_tensor_ch,
13654  const uint16_t in_tensor_group,
13655  const q7_t *ker_weight,
13656  const uint16_t out_tensor_ch,
13657  const uint16_t pad_x,
13658  const uint16_t pad_y,
13659  const uint16_t stride_x,
13660  const uint16_t stride_y,
13661  const int32_t *bias,
13662  q7_t *out_tensor,
13663  const int32_t *out_shift,
13664  const int32_t *out_scale,
13665  const int32_t out_offset,
13666  const int32_t in_offset,
13667  const int32_t act_min,
13668  const int32_t act_max,
13669  const uint16_t out_tensor_dim_x,
13670  const uint16_t out_tensor_dim_y,
13671  q15_t *tmp_buf)
13672 {
13673 #if defined(__zcc__)
13674 
13675  tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
13676  stride_y, pad_x, pad_y, act_min, act_max};
13677 
13678  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13679 
13680  tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13681  in_tensor_group, out_tensor_ch};
13682 
13683  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13684  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13685 
13686 #else
13687  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
13688  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13689  in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
13690  stride_y, bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
13691  act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
13692 #endif
13693 }
13694 
13742 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13743  const uint16_t in_tensor_dim_x,
13744  const uint16_t in_tensor_dim_y,
13745  const uint16_t in_tensor_ch,
13746  const q7_t *ker_weight,
13747  const uint16_t out_tensor_ch,
13748  const uint16_t ch_mult,
13749  const uint16_t ker_dim_x,
13750  const uint16_t ker_dim_y,
13751  const uint16_t pad_x,
13752  const uint16_t pad_y,
13753  const uint16_t stride_x,
13754  const uint16_t stride_y,
13755  const int32_t *bias,
13756  q7_t *out_tensor,
13757  const int32_t *out_shift,
13758  const int32_t *out_scale,
13759  const uint16_t out_tensor_dim_x,
13760  const uint16_t out_tensor_dim_y,
13761  const int32_t out_offset,
13762  const int32_t in_offset,
13763  const int32_t act_min,
13764  const int32_t act_max,
13765  const uint16_t dilation_x,
13766  const uint16_t dilation_y,
13767  q15_t *tmp_buf)
13768 {
13769 #if defined(__zcc__)
13770 
13771  tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13772  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13773 
13774  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13775 
13776  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13777  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13778 
13779  return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13780  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13781 
13782 #else
13783  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13784  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13785  out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13786  stride_y, bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13787  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13788  dilation_y, tmp_buf);
13789 #endif
13790 }
13791 
13831 static inline int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13832  const uint16_t in_tensor_dim_x,
13833  const uint16_t in_tensor_ch,
13834  const uint16_t in_tensor_group,
13835  const q7_t *ker_weight,
13836  const uint16_t out_tensor_ch,
13837  const uint16_t ker_dim_x,
13838  const uint16_t pad_x,
13839  const uint16_t stride_x,
13840  const int32_t *bias,
13841  q7_t *out_tensor,
13842  const int32_t *out_shift,
13843  const int32_t *out_scale,
13844  const int32_t out_offset,
13845  const int32_t in_offset,
13846  const int32_t act_min,
13847  const int32_t act_max,
13848  const uint16_t out_tensor_dim_x,
13849  q15_t *in_tmp_buf)
13850 {
13851 #if defined(__zcc__)
13852 
13853  tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
13854  act_min, act_max};
13855 
13856  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13857 
13858  tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
13859  ker_dim_x, out_tensor_dim_x, out_tensor_ch};
13860 
13861  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13862  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13863 
13864 #else
13865  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
13866  in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
13867  out_tensor_ch, ker_dim_x, pad_x, stride_x, bias, out_tensor, out_shift,
13868  out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13869  in_tmp_buf);
13870 #endif
13871 }
13872 
13917 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
13918  const uint16_t in_tensor_dim_x,
13919  const uint16_t in_tensor_dim_y,
13920  const uint16_t in_tensor_ch,
13921  const q7_t *ker_weight,
13922  const uint16_t out_tensor_ch,
13923  const uint16_t ker_dim_x,
13924  const uint16_t ker_dim_y,
13925  const uint16_t pad_x,
13926  const uint16_t pad_y,
13927  const uint16_t stride_x,
13928  const uint16_t stride_y,
13929  const int32_t *bias,
13930  q7_t *out_tensor,
13931  const int32_t *out_shift,
13932  const int32_t *out_scale,
13933  const uint16_t out_tensor_dim_x,
13934  const uint16_t out_tensor_dim_y,
13935  const int32_t out_offset,
13936  const int32_t in_offset,
13937  const int32_t act_min,
13938  const int32_t act_max,
13939  const uint16_t dilation_x,
13940  const uint16_t dilation_y,
13941  q15_t *in_tmp_buf)
13942 {
13943 #if defined(__zcc__)
13944 
13945  tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13946  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13947 
13948  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13949 
13950  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13951  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13952 
13953  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13954  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13955 
13956 #else
13957  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13958  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13959  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13960  bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13961  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13962  dilation_y, in_tmp_buf);
13963 #endif
13964 }
13965 
13972 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch)
13973 {
13974 #if defined(__zcc__)
13975  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
13976  in_tensor_ch);
13977 #else
13978  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13979  in_tensor_ch);
13980 #endif
13981 }
13982 
13991 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch,
13992  const uint16_t ker_dim_x,
13993  const uint16_t ker_dim_y)
13994 {
13995 #if defined(__zcc__)
13996  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13997  in_tensor_ch, ker_dim_x, ker_dim_y);
13998 #else
13999  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
14000  in_tensor_ch, ker_dim_x, ker_dim_y);
14001 #endif
14002 }
14003 
14013 static inline int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
14014  const uint16_t ker_dim_x,
14015  const uint16_t ker_dim_y)
14016 {
14017 #if defined(__zcc__)
14018  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
14019  in_tensor_ch, ker_dim_x, ker_dim_y);
14020 #else
14021  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14022  in_tensor_ch, ker_dim_x, ker_dim_y);
14023 #endif
14024 }
14025 
14034 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
14035  const uint16_t ker_dim_x,
14036  const uint16_t ker_dim_y)
14037 
14038 {
14039 #if defined(__zcc__)
14040  return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14041  in_tensor_ch, ker_dim_x, ker_dim_y);
14042 #else
14043  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14044  in_tensor_ch, ker_dim_x, ker_dim_y);
14045 #endif
14046 }
14047 
14048 #endif
14049 
14050 #endif
14051 
14052 #ifdef HPM_MATH_NN_CONNECTED
14053 #ifdef HPM_EN_MATH_NN_LIB
14054 #if defined(__zcc__)
14055 #include "tpt_nn_fully_connected.h"
14056 #else
14057 #include "riscv_nn_fully_connected.h"
14058 #endif
14059 
14103 static inline int32_t hpm_nn_fc_s8_s8_s8_sft_bias(const q7_t *in_vec,
14104  const q7_t *wt_mat,
14105  const uint16_t size,
14106  const uint16_t wt_row_num,
14107  const uint16_t bias_lshift,
14108  const uint16_t out_rshift,
14109  const q7_t *bias,
14110  q7_t *out_vec,
14111  q15_t *in_tmp_buf)
14112 #if defined(__zcc__)
14113  return tpt_nn_fc_s8_s8_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
14115  in_tmp_buf);
14116 #else
14117  return riscv_nn_fc_s8_s8_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
14120 #endif
14121 }
14122 
14144 static inline int32_t hpm_nn_fc_s8_s8_s8_sft_bias_fast(const q7_t *in_vec,
14145  const q7_t *wt_mat,
14146  const uint16_t size,
14147  const uint16_t wt_row_num,
14148  const uint16_t bias_lshift,
14149  const uint16_t out_rshift,
14150  const q7_t *bias,
14151  q7_t *out_vec,
14152  q15_t *in_tmp_buf)
14153 {
14154 #if defined(__zcc__)
14155  return tpt_nn_fc_s8_s8_s8_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14157  out_vec, in_tmp_buf);
14158 #else
14159  return riscv_nn_fc_s8_s8_s8_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14161  out_vec, in_tmp_buf);
14162 #endif
14163 }
14164 
14179 static inline int32_t hpm_nn_fc_s16_s16_s16_sft_bias(const q15_t *in_vec,
14180  const q15_t *wt_mat,
14181  const uint16_t size,
14182  const uint16_t wt_row_num,
14183  const uint16_t bias_lshift,
14184  const uint16_t out_rshift,
14185  const q15_t *bias,
14186  q15_t *out_vec,
14187  q15_t *tmp_buf)
14188 {
14189 #if defined(__zcc__)
14190  return tpt_nn_fc_s16_s16_s16_sft_bias(in_vec, wt_mat, size, wt_row_num,
14192  tmp_buf);
14193 #else
14194  return riscv_nn_fc_s16_s16_s16_sft_bias(in_vec, wt_mat, size, wt_row_num,
14196  out_vec, tmp_buf);
14197 #endif
14198 }
14199 
14222 static inline int32_t hpm_nn_fc_s16_s16_s16_sft_bias_fast(const q15_t *in_vec,
14223  const q15_t *wt_mat,
14224  const uint16_t size,
14225  const uint16_t wt_row_num,
14226  const uint16_t bias_lshift,
14227  const uint16_t out_rshift,
14228  const q15_t *bias,
14229  q15_t *out_vec,
14230  q15_t *in_tmp_buf)
14231 {
14232 #if defined(__zcc__)
14233  return tpt_nn_fc_s16_s16_s16_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14235  out_vec, in_tmp_buf);
14236 #else
14237  return riscv_nn_fc_s16_s16_s16_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14239  out_vec, in_tmp_buf);
14240 #endif
14241 }
14242 
14258 static inline int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias(const q15_t *in_vec,
14259  const q7_t *wt_mat,
14260  const uint16_t size,
14261  const uint16_t wt_row_num,
14262  const uint16_t bias_lshift,
14263  const uint16_t out_rshift,
14264  const q7_t *bias,
14265  q15_t *out_vec,
14266  q15_t *tmp_buf)
14267 {
14268 #if defined(__zcc__)
14269  return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
14271  out_vec, tmp_buf);
14272 #else
14273  return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias(
14275  tmp_buf);
14276 #endif
14277 }
14278 
14300 static inline int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(const q15_t *in_vec,
14301  const q7_t *wt_mat,
14302  const uint16_t size,
14303  const uint16_t wt_row_num,
14304  const uint16_t bias_lshift,
14305  const uint16_t out_rshift,
14306  const q7_t *bias,
14307  q15_t *out_vec,
14308  q15_t *tmp_buf)
14309 {
14310 #if defined(__zcc__)
14311  return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14313  tmp_buf);
14314 #else
14315  return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14317  tmp_buf);
14318 #endif
14319 }
14320 
14345 static inline int32_t hpm_nn_fc_s8_s8_s8_sym_bias(const q7_t *in_vec,
14346  const q7_t *wt_mat,
14347  const uint16_t size,
14348  const uint16_t wt_row_num,
14349  const uint16_t pre_rshift,
14350  const uint16_t out_scale,
14351  const uint16_t post_rshift,
14352  const q31_t *bias,
14353  q7_t *out_vec,
14354  q15_t *in_tmp_buf)
14355 {
14356 #if defined(__zcc__)
14357  return tpt_nn_fc_s8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14358  pre_rshift, out_scale, post_rshift, bias,
14359  out_vec, in_tmp_buf);
14360 #else
14361  return riscv_nn_fc_s8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14362  pre_rshift, out_scale, post_rshift, bias,
14363  out_vec, in_tmp_buf);
14364 #endif
14365 }
14366 
14391 static inline int32_t hpm_nn_fc_s8_s16_s8_sym_bias(const q7_t *in_vec,
14392  const q7_t *wt_mat,
14393  const uint16_t size,
14394  const uint16_t wt_row_num,
14395  const uint16_t pre_rshift,
14396  const uint16_t out_scale,
14397  const uint16_t post_rshift,
14398  const q31_t *bias,
14399  q15_t *out_vec,
14400  q15_t *in_tmp_buf)
14401 {
14402 #if defined(__zcc__)
14403  return tpt_nn_fc_s8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14404  pre_rshift, out_scale, post_rshift, bias,
14405  out_vec, in_tmp_buf);
14406 #else
14407  return riscv_nn_fc_s8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14408  pre_rshift, out_scale, post_rshift,
14409  bias, out_vec, in_tmp_buf);
14410 #endif
14411 }
14412 
14437 static inline int32_t hpm_nn_fc_u8_u8_s8_sym_bias(const u8_t *in_vec,
14438  const q7_t *wt_mat,
14439  const uint16_t size,
14440  const uint16_t wt_row_num,
14441  const uint16_t pre_rshift,
14442  const uint16_t out_scale,
14443  const uint16_t post_rshift,
14444  const q31_t *bias,
14445  u8_t *out_vec,
14446  q15_t *in_tmp_buf)
14447 {
14448 #if defined(__zcc__)
14449  return tpt_nn_fc_u8_u8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14450  pre_rshift, out_scale, post_rshift, bias,
14451  out_vec, in_tmp_buf);
14452 #else
14453  return riscv_nn_fc_u8_u8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14454  pre_rshift, out_scale, post_rshift, bias,
14455  out_vec, in_tmp_buf);
14456 #endif
14457 }
14458 
14483 static inline int32_t hpm_nn_fc_u8_s8_s8_sym_bias(const u8_t *in_vec,
14484  const q7_t *wt_mat,
14485  const uint16_t size,
14486  const uint16_t wt_row_num,
14487  const uint16_t pre_rshift,
14488  const uint16_t out_scale,
14489  const uint16_t post_rshift,
14490  const q31_t *bias,
14491  q7_t *out_vec,
14492  q15_t *in_tmp_buf)
14493 {
14494 #if defined(__zcc__)
14495  return tpt_nn_fc_u8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14496  pre_rshift, out_scale, post_rshift, bias,
14497  out_vec, in_tmp_buf);
14498 #else
14499  return riscv_nn_fc_u8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14500  pre_rshift, out_scale, post_rshift, bias,
14501  out_vec, in_tmp_buf);
14502 #endif
14503 }
14504 
14529 static inline int32_t hpm_nn_fc_u8_s16_s8_sym_bias(const u8_t *in_vec,
14530  const q7_t *wt_mat,
14531  const uint16_t size,
14532  const uint16_t wt_row_num,
14533  const uint16_t pre_rshift,
14534  const uint16_t out_scale,
14535  const uint16_t post_rshift,
14536  const q31_t *bias,
14537  q15_t *out_vec,
14538  q15_t *in_tmp_buf)
14539 {
14540 #if defined(__zcc__)
14541  return tpt_nn_fc_u8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14542  pre_rshift, out_scale, post_rshift, bias,
14543  out_vec, in_tmp_buf);
14544 #else
14545  return riscv_nn_fc_u8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14546  pre_rshift, out_scale, post_rshift,
14547  bias, out_vec, in_tmp_buf);
14548 #endif
14549 }
14550 
14574 static inline int32_t hpm_nn_fc_s8_s8_s8_sym(const q7_t *in_vec,
14575  const q7_t *wt_mat,
14576  const uint16_t size,
14577  const uint16_t wt_row_num,
14578  const uint16_t pre_rshift,
14579  const uint16_t out_scale,
14580  const uint16_t post_rshift,
14581  q7_t *out_vec,
14582  q15_t *in_tmp_buf)
14583 {
14584 #if defined(__zcc__)
14585  return tpt_nn_fc_s8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14586  out_scale, post_rshift, out_vec, in_tmp_buf);
14587 #else
14588  return riscv_nn_fc_s8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14589  out_scale, post_rshift, out_vec, in_tmp_buf);
14590 #endif
14591 }
14592 
14616 static inline int32_t hpm_nn_fc_s8_s16_s8_sym(const q7_t *in_vec,
14617  const q7_t *wt_mat,
14618  const uint16_t size,
14619  const uint16_t wt_row_num,
14620  const uint16_t pre_rshift,
14621  const uint16_t out_scale,
14622  const uint16_t post_rshift,
14623  q15_t *out_vec,
14624  q15_t *in_tmp_buf)
14625 {
14626 #if defined(__zcc__)
14627  return tpt_nn_fc_s8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14628  out_scale, post_rshift, out_vec, in_tmp_buf);
14629 #else
14630  return riscv_nn_fc_s8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14631  out_scale, post_rshift, out_vec, in_tmp_buf);
14632 #endif
14633 }
14634 
14658 static inline int32_t hpm_nn_fc_u8_u8_s8_sym(const u8_t *in_vec,
14659  const q7_t *wt_mat,
14660  const uint16_t size,
14661  const uint16_t wt_row_num,
14662  const uint16_t pre_rshift,
14663  const uint16_t out_scale,
14664  const uint16_t post_rshift,
14665  u8_t *out_vec,
14666  q15_t *in_tmp_buf)
14667 {
14668 #if defined(__zcc__)
14669  return tpt_nn_fc_u8_u8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14670  out_scale, post_rshift, out_vec, in_tmp_buf);
14671 #else
14672  return riscv_nn_fc_u8_u8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14673  out_scale, post_rshift, out_vec, in_tmp_buf);
14674 #endif
14675 }
14676 
14700 static inline int32_t hpm_nn_fc_u8_s8_s8_sym(const u8_t *in_vec,
14701  const q7_t *wt_mat,
14702  const uint16_t size,
14703  const uint16_t wt_row_num,
14704  const uint16_t pre_rshift,
14705  const uint16_t out_scale,
14706  const uint16_t post_rshift,
14707  q7_t *out_vec,
14708  q15_t *in_tmp_buf)
14709 {
14710 #if defined(__zcc__)
14711  return tpt_nn_fc_u8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14712  out_scale, post_rshift, out_vec, in_tmp_buf);
14713 #else
14714  return riscv_nn_fc_u8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14715  out_scale, post_rshift, out_vec, in_tmp_buf);
14716 #endif
14717 }
14718 
14742 static inline int32_t hpm_nn_fc_u8_s16_s8_sym(const u8_t *in_vec,
14743  const q7_t *wt_mat,
14744  const uint16_t size,
14745  const uint16_t wt_row_num,
14746  const uint16_t pre_rshift,
14747  const uint16_t out_scale,
14748  const uint16_t post_rshift,
14749  q15_t *out_vec,
14750  q15_t *in_tmp_buf)
14751 {
14752 #if defined(__zcc__)
14753  return tpt_nn_fc_u8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14754  out_scale, post_rshift, out_vec, in_tmp_buf);
14755 #else
14756  return riscv_nn_fc_u8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14757  out_scale, post_rshift, out_vec, in_tmp_buf);
14758 #endif
14759 }
14760 
14785 static inline int32_t hpm_nn_fc_s8_s8_s8_sym_bias_fast(const q7_t *in_vec,
14786  const q7_t *wt_mat,
14787  const uint16_t size,
14788  const uint16_t wt_row_num,
14789  const uint16_t pre_rshift,
14790  const uint16_t out_scale,
14791  const uint16_t post_rshift,
14792  const q31_t *bias,
14793  q7_t *out_vec,
14794  q15_t *in_tmp_buf)
14795 {
14796 #if defined(__zcc__)
14797  return tpt_nn_fc_s8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14798  pre_rshift, out_scale, post_rshift,
14799  bias, out_vec, in_tmp_buf);
14800 #else
14801  return riscv_nn_fc_s8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14802  pre_rshift, out_scale, post_rshift,
14803  bias, out_vec, in_tmp_buf);
14804 #endif
14805 }
14806 
14832 static inline int32_t hpm_nn_fc_s8_s16_s8_sym_bias_fast(const q7_t *in_vec,
14833  const q7_t *wt_mat,
14834  const uint16_t size,
14835  const uint16_t wt_row_num,
14836  const uint16_t pre_rshift,
14837  const uint16_t out_scale,
14838  const uint16_t post_rshift,
14839  const q31_t *bias,
14840  q15_t *out_vec,
14841  q15_t *in_tmp_buf)
14842 {
14843 #if defined(__zcc__)
14844  return tpt_nn_fc_s8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14845  pre_rshift, out_scale, post_rshift,
14846  bias, out_vec, in_tmp_buf);
14847 #else
14848  return riscv_nn_fc_s8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14849  pre_rshift, out_scale, post_rshift,
14850  bias, out_vec, in_tmp_buf);
14851 #endif
14852 }
14853 
14878 static inline int32_t hpm_nn_fc_u8_u8_s8_sym_bias_fast(const u8_t *in_vec,
14879  const q7_t *wt_mat,
14880  const uint16_t size,
14881  const uint16_t wt_row_num,
14882  const uint16_t pre_rshift,
14883  const uint16_t out_scale,
14884  const uint16_t post_rshift,
14885  const q31_t *bias,
14886  u8_t *out_vec,
14887  q15_t *in_tmp_buf)
14888 {
14889 #if defined(__zcc__)
14890  return tpt_nn_fc_u8_u8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14891  pre_rshift, out_scale, post_rshift,
14892  bias, out_vec, in_tmp_buf);
14893 #else
14894  return riscv_nn_fc_u8_u8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14895  pre_rshift, out_scale, post_rshift,
14896  bias, out_vec, in_tmp_buf);
14897 #endif
14898 }
14899 
14925 static inline int32_t hpm_nn_fc_u8_s8_s8_sym_bias_fast(const u8_t *in_vec,
14926  const q7_t *wt_mat,
14927  const uint16_t size,
14928  const uint16_t wt_row_num,
14929  const uint16_t pre_rshift,
14930  const uint16_t out_scale,
14931  const uint16_t post_rshift,
14932  const q31_t *bias,
14933  q7_t *out_vec,
14934  q15_t *in_tmp_buf)
14935 {
14936 #if defined(__zcc__)
14937  return tpt_nn_fc_u8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14938  pre_rshift, out_scale, post_rshift,
14939  bias, out_vec, in_tmp_buf);
14940 #else
14941  return riscv_nn_fc_u8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14942  pre_rshift, out_scale, post_rshift,
14943  bias, out_vec, in_tmp_buf);
14944 #endif
14945 }
14946 
14972 static inline int32_t hpm_nn_fc_u8_s16_s8_sym_bias_fast(const u8_t *in_vec,
14973  const q7_t *wt_mat,
14974  const uint16_t size,
14975  const uint16_t wt_row_num,
14976  const uint16_t pre_rshift,
14977  const uint16_t out_scale,
14978  const uint16_t post_rshift,
14979  const q31_t *bias,
14980  q15_t *out_vec,
14981  q15_t *in_tmp_buf)
14982 {
14983 #if defined(__zcc__)
14984  return tpt_nn_fc_u8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14985  pre_rshift, out_scale, post_rshift,
14986  bias, out_vec, in_tmp_buf);
14987 #else
14988  return riscv_nn_fc_u8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14989  pre_rshift, out_scale, post_rshift,
14990  bias, out_vec, in_tmp_buf);
14991 #endif
14992 }
14993 
15017 static inline int32_t hpm_nn_fc_s8_s8_s8_sym_fast(const q7_t *in_vec,
15018  const q7_t *wt_mat,
15019  const uint16_t size,
15020  const uint16_t wt_row_num,
15021  const uint16_t pre_rshift,
15022  const uint16_t out_scale,
15023  const uint16_t post_rshift,
15024  q7_t *out_vec,
15025  q15_t *in_tmp_buf)
15026 {
15027 #if defined(__zcc__)
15028  return tpt_nn_fc_s8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15029  pre_rshift, out_scale, post_rshift,
15030  out_vec, in_tmp_buf);
15031 #else
15032  return riscv_nn_fc_s8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15033  pre_rshift, out_scale, post_rshift,
15034  out_vec, in_tmp_buf);
15035 #endif
15036 }
15037 
15062 static inline int32_t hpm_nn_fc_s8_s16_s8_sym_fast(const q7_t *in_vec,
15063  const q7_t *wt_mat,
15064  const uint16_t size,
15065  const uint16_t wt_row_num,
15066  const uint16_t pre_rshift,
15067  const uint16_t out_scale,
15068  const uint16_t post_rshift,
15069  q15_t *out_vec,
15070  q15_t *in_tmp_buf)
15071 {
15072 #if defined(__zcc__)
15073  return tpt_nn_fc_s8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15074  pre_rshift, out_scale, post_rshift,
15075  out_vec, in_tmp_buf);
15076 #else
15077  return riscv_nn_fc_s8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15078  pre_rshift, out_scale, post_rshift,
15079  out_vec, in_tmp_buf);
15080 #endif
15081 }
15082 
15106 static inline int32_t hpm_nn_fc_u8_u8_s8_sym_fast(const u8_t *in_vec,
15107  const q7_t *wt_mat,
15108  const uint16_t size,
15109  const uint16_t wt_row_num,
15110  const uint16_t pre_rshift,
15111  const uint16_t out_scale,
15112  const uint16_t post_rshift,
15113  u8_t *out_vec,
15114  q15_t *in_tmp_buf)
15115 {
15116 #if defined(__zcc__)
15117  return tpt_nn_fc_u8_u8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15118  pre_rshift, out_scale, post_rshift,
15119  out_vec, in_tmp_buf);
15120 #else
15121  return riscv_nn_fc_u8_u8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15122  pre_rshift, out_scale, post_rshift,
15123  out_vec, in_tmp_buf);
15124 #endif
15125 }
15126 
15151 static inline int32_t hpm_nn_fc_u8_s8_s8_sym_fast(const u8_t *in_vec,
15152  const q7_t *wt_mat,
15153  const uint16_t size,
15154  const uint16_t wt_row_num,
15155  const uint16_t pre_rshift,
15156  const uint16_t out_scale,
15157  const uint16_t post_rshift,
15158  q7_t *out_vec,
15159  q15_t *in_tmp_buf)
15160 {
15161 #if defined(__zcc__)
15162  return tpt_nn_fc_u8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15163  pre_rshift, out_scale, post_rshift,
15164  out_vec, in_tmp_buf);
15165 #else
15166  return riscv_nn_fc_u8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15167  pre_rshift, out_scale, post_rshift,
15168  out_vec, in_tmp_buf);
15169 #endif
15170 }
15171 
15196 static inline int32_t hpm_nn_fc_u8_s16_s8_sym_fast(const u8_t *in_vec,
15197  const q7_t *wt_mat,
15198  const uint16_t size,
15199  const uint16_t wt_row_num,
15200  const uint16_t pre_rshift,
15201  const uint16_t out_scale,
15202  const uint16_t post_rshift,
15203  q15_t *out_vec,
15204  q15_t *in_tmp_buf)
15205 {
15206 #if defined(__zcc__)
15207  return tpt_nn_fc_u8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15208  pre_rshift, out_scale, post_rshift,
15209  out_vec, in_tmp_buf);
15210 #else
15211  return riscv_nn_fc_u8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15212  pre_rshift, out_scale, post_rshift,
15213  out_vec, in_tmp_buf);
15214 #endif
15215 }
15216 
15227 static inline void hpm_nn_fc_s8_wt_converter(const q7_t *wt_mat,
15228  const uint32_t size,
15229  const uint32_t wt_row_num,
15230  q7_t *wt_mat_out)
15231 {
15232 #if defined(__zcc__)
15233  tpt_nn_fc_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15234 #else
15235  riscv_nn_fc_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15236 #endif
15237 }
15238 
15249 static inline void hpm_nn_fc_s16_wt_converter(const q15_t *wt_mat,
15250  const uint32_t size,
15251  const uint32_t wt_row_num,
15252  q15_t *wt_mat_out)
15253 {
15254 #if defined(__zcc__)
15255  tpt_nn_fc_s16_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15256 #else
15257  riscv_nn_fc_s16_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15258 #endif
15259 }
15260 
15270 static inline void hpm_nn_fc_mat_vec_s8_wt_converter(const q7_t *wt_mat,
15271  const uint32_t size,
15272  const uint32_t wt_row_num,
15273  q7_t *wt_mat_out)
15274 {
15275 #if defined(__zcc__)
15276  tpt_nn_fc_mat_vec_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15277 #else
15278  riscv_nn_fc_mat_vec_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15279 #endif
15280 }
15281 
15312 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec,
15313  const int8_t *wt_mat,
15314  const uint16_t in_vec_col,
15315  const uint16_t wt_mat_row,
15316  const uint16_t in_vec_group,
15317  const int32_t in_offset,
15318  const int32_t wt_offset,
15319  const int32_t out_scale,
15320  const int32_t out_shift,
15321  const int32_t out_offset,
15322  const int32_t *bias,
15323  int8_t *out_vec,
15324  const int32_t act_min,
15325  const int32_t act_max,
15326  q15_t *tmp_buf)
15327 {
15328 #if defined(__zcc__)
15329 
15330  tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15331  out_shift, act_min, act_max};
15332  tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15333 
15334  return tpt_fully_connected_s8(out_vec, in_vec, wt_mat, bias, &aFc_params,
15335  &aFC_dims, tmp_buf);
15336 #else
15337  return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec, wt_mat, in_vec_col, wt_mat_row,
15338  in_vec_group, in_offset, wt_offset,
15339  out_scale, out_shift, out_offset, bias,
15340  out_vec, act_min, act_max, tmp_buf);
15341 #endif
15342 }
15343 
15351 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
15352 {
15353 #if defined(__zcc__)
15354  return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15355 #else
15356  return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15357 #endif
15358 }
15359 
15364 #endif
15365 
15366 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15367 #if defined(__zcc__)
15368 #include "tpt_nn_fully_connected.h"
15369 #else
15370 #include "riscv_nn_fully_connected.h"
15371 #endif
15372 
15403 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec,
15404  const int8_t *wt_mat,
15405  const uint16_t in_vec_col,
15406  const uint16_t wt_mat_row,
15407  const uint16_t in_vec_group,
15408  const int32_t in_offset,
15409  const int32_t wt_offset,
15410  const int32_t out_scale,
15411  const int32_t out_shift,
15412  const int32_t out_offset,
15413  const int32_t *bias,
15414  int8_t *out_vec,
15415  const int32_t act_min,
15416  const int32_t act_max,
15417  q15_t *tmp_buf)
15418 {
15419 #if defined(__zcc__)
15420 
15421  tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15422  out_shift, act_min, act_max};
15423  tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15424 
15425  return tpt_fully_connected_s8(out_vec, in_vec, wt_mat, bias, &aFc_params,
15426  &aFC_dims, tmp_buf);
15427 #else
15428  return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec, wt_mat, in_vec_col, wt_mat_row,
15429  in_vec_group, in_offset, wt_offset,
15430  out_scale, out_shift, out_offset, bias,
15431  out_vec, act_min, act_max, tmp_buf);
15432 #endif
15433 }
15434 
15442 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
15443 {
15444 #if defined(__zcc__)
15445  return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15446 #else
15447  return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15448 #endif
15449 }
15450 
15451 #endif /* HPM_EN_MATH_NN_RVP32_LIB */
15452 
15453 #endif
15454 
15455 #ifdef HPM_MATH_NN_POOLING
15456 #ifdef HPM_EN_MATH_NN_LIB
15457 #if defined(__zcc__)
15458 #include "tpt_nn_pooling.h"
15459 #else
15460 #include "riscv_nn_pooling.h"
15461 #endif
15462 
15505 static inline void hpm_nn_avepool_HWC_s8(q7_t *in_tensor,
15506  const uint16_t in_tensor_dim,
15507  const uint16_t in_tensor_ch,
15508  const uint16_t ker_dim,
15509  const uint16_t pad,
15510  const uint16_t stride,
15511  const uint16_t out_tensor_dim,
15512  q7_t *in_tmp_buf,
15513  q7_t *out_tensor)
15514 {
15515 #if defined(__zcc__)
15516  tpt_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15517  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15518 #else
15519  riscv_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15520  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15521 #endif
15522 }
15523 
15570 static inline void hpm_nn_avepool_HWC_s8_any(q7_t *in_tensor,
15571  const uint16_t in_tensor_dim_x,
15572  const uint16_t in_tensor_dim_y,
15573  const uint16_t in_tensor_ch,
15574  const uint16_t ker_dim_x,
15575  const uint16_t ker_dim_y,
15576  const uint16_t pad_x,
15577  const uint16_t pad_y,
15578  const uint16_t stride_x,
15579  const uint16_t stride_y,
15580  const uint16_t out_tensor_dim_x,
15581  const uint16_t out_tensor_dim_y,
15582  q7_t *in_tmp_buf,
15583  q7_t *out_tensor,
15584  const uint16_t out_lshift)
15585 {
15586 #if defined(__zcc__)
15587  tpt_nn_avepool_HWC_s8_any(
15588  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15589  ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15590  out_tensor_dim_y, in_tmp_buf, out_tensor, out_lshift);
15591 #else
15592  riscv_nn_avepool_HWC_s8_any(
15593  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15594  ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15595  out_tensor_dim_y, in_tmp_buf, out_tensor, out_lshift);
15596 #endif
15597 }
15598 
15627 static inline int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y,
15628  const int in_tensor_dim_x,
15629  const int out_tensor_dim_y,
15630  const int out_tensor_dim_x,
15631  const int stride_y,
15632  const int stride_x,
15633  const int ker_dim_y,
15634  const int ker_dim_x,
15635  const int pad_y,
15636  const int pad_x,
15637  const int act_min,
15638  const int act_max,
15639  const int in_tensor_ch,
15640  int8_t *in_tensor,
15641  int16_t *in_tmp_buf,
15642  int8_t *out_tensor)
15643 {
15644 #if defined(__zcc__)
15645 
15646  tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15647  act_min, act_max};
15648  tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15649  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15650 
15651  return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims, in_tmp_buf);
15652 
15653 #else
15654  return riscv_nn_avepool_HWC_s8_any_act(
15655  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15656  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15657  in_tensor_ch, in_tensor, in_tmp_buf, out_tensor);
15658 #endif
15659 }
15660 
15669 static inline int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
15670 {
15671 #if defined(__zcc__)
15672  return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15673  in_tensor_ch);
15674 #else
15675  return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15676  in_tensor_ch);
15677 #endif
15678 }
15679 
15709  static inline void hpm_nn_maxpool_HWC_s8(q7_t *in_tensor,
15710  const uint16_t in_tensor_dim,
15711  const uint16_t in_tensor_ch,
15712  const uint16_t ker_dim,
15713  const uint16_t pad,
15714  const uint16_t stride,
15715  const uint16_t out_tensor_dim,
15716  q7_t *in_tmp_buf,
15717  q7_t *out_tensor)
15718 {
15719 #if defined(__zcc__)
15720  tpt_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15721  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15722 #else
15723  riscv_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15724  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15725 #endif
15726 }
15727 
15754 static inline int32_t hpm_nn_maxpool_HWC_s8_any_act(const uint16_t in_tensor_dim_y,
15755  const uint16_t in_tensor_dim_x,
15756  const uint16_t out_tensor_dim_y,
15757  const uint16_t out_tensor_dim_x,
15758  const uint16_t stride_y,
15759  const uint16_t stride_x,
15760  const uint16_t ker_dim_y,
15761  const uint16_t ker_dim_x,
15762  const uint16_t pad_y,
15763  const uint16_t pad_x,
15764  const int8_t act_min,
15765  const int8_t act_max,
15766  const uint16_t in_tensor_ch,
15767  int8_t *in_tensor,
15768  int16_t *tmp_buffer,
15769  int8_t *out_tensor)
15770 {
15771 #if defined(__zcc__)
15772  return tpt_nn_maxpool_HWC_s8_any_act(
15773  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15774  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15775  in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15776 #else
15777  return riscv_nn_maxpool_HWC_s8_any_act(
15778  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15779  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15780  in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15781 #endif
15782 }
15783 
15788 #endif
15789 
15790 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15791 #if defined(__zcc__)
15792 #include "tpt_nn_pooling.h"
15793 #else
15794 #include "riscv_nn_pooling.h"
15795 #endif
15796 
15825 static inline int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y,
15826  const int in_tensor_dim_x,
15827  const int out_tensor_dim_y,
15828  const int out_tensor_dim_x,
15829  const int stride_y,
15830  const int stride_x,
15831  const int ker_dim_y,
15832  const int ker_dim_x,
15833  const int pad_y,
15834  const int pad_x,
15835  const int act_min,
15836  const int act_max,
15837  const int in_tensor_ch,
15838  int8_t *in_tensor,
15839  int16_t *in_tmp_buf,
15840  int8_t *out_tensor)
15841 {
15842 #if defined(__zcc__)
15843 
15844  tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15845  act_min, act_max};
15846  tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15847  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15848 
15849  return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims, in_tmp_buf);
15850 
15851 #else
15852  return riscv_nn_avepool_HWC_s8_any_act(
15853  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15854  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15855  in_tensor_ch, in_tensor, in_tmp_buf, out_tensor);
15856 #endif
15857 }
15858 
15867 static inline int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
15868 {
15869 #if defined(__zcc__)
15870  return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15871  in_tensor_ch);
15872 #else
15873  return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15874  in_tensor_ch);
15875 #endif
15876 }
15877 
15878 #endif
15879 #endif
15880 
15881 #ifdef HPM_MATH_NN_SOFTMAX
15882 #ifdef HPM_EN_MATH_NN_LIB
15883 #if defined(__zcc__)
15884 #include "tpt_nn_softmax.h"
15885 #else
15886 #include "riscv_nn_softmax.h"
15887 #endif
15888 
15913 static inline void hpm_nn_softmax_s8_fast(const q7_t *in_vec,
15914  const uint16_t size,
15915  q7_t *out_vec)
15916 {
15917 #if defined(__zcc__)
15918  tpt_nn_softmax_s8_fast(in_vec, size, out_vec);
15919 #else
15920  riscv_nn_softmax_s8_fast(in_vec, size, out_vec);
15921 #endif
15922 }
15923 
15931 static inline void hpm_nn_softmax_s16_fast(const q15_t *in_vec,
15932  const uint16_t size,
15933  q15_t *out_vec)
15934 {
15935 #if defined(__zcc__)
15936  tpt_nn_softmax_s16_fast(in_vec, size, out_vec);
15937 #else
15938  riscv_nn_softmax_s16_fast(in_vec, size, out_vec);
15939 #endif
15940 }
15941 
15956 static inline void hpm_nn_softmax_s8_hp(const int8_t *in_tensor,
15957  const int32_t in_tensor_row,
15958  const int32_t in_tensor_col,
15959  const int32_t scale,
15960  const int32_t lshift,
15961  const int32_t diff_min,
15962  int8_t *out_tensor)
15963 {
15964 #if defined(__zcc__)
15965  tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15966  diff_min);
15967 #else
15968  riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15969  diff_min, out_tensor);
15970 #endif
15971 }
15972 
15987 static inline void hpm_nn_softmax_u8_hp(const uint8_t *in_tensor,
15988  const int32_t in_tensor_row,
15989  const int32_t in_tensor_col,
15990  const int32_t scale,
15991  const int32_t lshift,
15992  const int32_t diff_min,
15993  uint8_t *out_tensor)
15994 {
15995 #if defined(__zcc__)
15996  tpt_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15997  diff_min, out_tensor);
15998 #else
15999  riscv_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16000  diff_min, out_tensor);
16001 #endif
16002 }
16003 
16008 #endif
16009 
16010 #ifdef HPM_EN_MATH_NN_RVP32_LIB
16011 #if defined(__zcc__)
16012 #include "tpt_nn_softmax.h"
16013 #else
16014 #include "riscv_nn_softmax.h"
16015 #endif
16016 
16031 static inline void hpm_nn_softmax_s8_hp(const int8_t *in_tensor,
16032  const int32_t in_tensor_row,
16033  const int32_t in_tensor_col,
16034  const int32_t scale,
16035  const int32_t lshift,
16036  const int32_t diff_min,
16037  int8_t *out_tensor)
16038 {
16039 #if defined(__zcc__)
16040  tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16041  diff_min);
16042 #else
16043  riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16044  diff_min, out_tensor);
16045 #endif
16046 }
16047 #endif
16048 
16049 #endif
16050 
16051 #ifdef HPM_MATH_NN_UTIL
16052 #ifdef HPM_EN_MATH_NN_LIB
16053 #if defined(__zcc__)
16054 #include "tpt_nn_util.h"
16055 #else
16056 #include "riscv_nn_util.h"
16057 #endif
16058 
16067 #ifdef __riscv_zfh
16076 static inline int32_t hpm_nn_exp_f16(const float16_t *in_vec,
16077  const uint32_t size,
16078  float16_t *out_vec)
16079 {
16080 #if defined(__zcc__)
16081  return tpt_nn_exp_f16(in_vec, size, out_vec);
16082 #else
16083  return riscv_nn_exp_f16(in_vec, size, out_vec);
16084 #endif
16085 }
16086 #endif
16087 
16104 static inline void hpm_nn_reshape_s8(const int8_t *in_tensor,
16105  int8_t *out_tensor,
16106  const uint32_t size)
16107 {
16108 #if defined(__zcc__)
16109  tpt_reshape_s8(out_tensor, in_tensor, size);
16110 #else
16111  riscv_nn_reshape_s8(in_tensor, out_tensor, size);
16112 #endif
16113 }
16114 
16134 static inline int32_t hpm_nn_top_k_s8(q7_t *in_vec,
16135  uint32_t size,
16136  uint32_t k,
16137  q7_t *val,
16138  uint32_t *idx)
16139 {
16140 #if defined(__zcc__)
16141  return tpt_nn_top_k_s8(in_vec, size, k, val, idx);
16142 #else
16143  return riscv_nn_top_k_s8(in_vec, size, k, val, idx);
16144 #endif
16145 }
16146 
16147 #ifdef __riscv_zfh
16167 static inline int32_t hpm_nn_top_k_f16(float16_t *in_vec,
16168  uint32_t size,
16169  uint32_t k,
16170  float16_t *val,
16171  uint32_t *idx)
16172 {
16173 #if defined(__zcc__)
16174  return tpt_nn_top_k_f16(in_vec, size, k, val, idx);
16175 #else
16176  return riscv_nn_top_k_f16(in_vec, size, k, val, idx);
16177 #endif
16178 }
16179 #endif
16180 
16185 #endif
16186 
16187 #ifdef HPM_EN_MATH_NN_RVP32_LIB
16188 #if defined(__zcc__)
16189 #include "tpt_nn_util.h"
16190 #else
16191 #include "riscv_nn_util.h"
16192 #endif
16193 
16210 static inline void hpm_nn_reshape_s8(const int8_t *in_tensor,
16211  int8_t *out_tensor,
16212  const uint32_t size)
16213 {
16214 #if defined(__zcc__)
16215  tpt_reshape_s8(out_tensor, in_tensor, size);
16216 #else
16217  riscv_nn_reshape_s8(in_tensor, out_tensor, size);
16218 #endif
16219 }
16220 
16221 #endif
16222 
16227 #endif
16228 
16229 #ifdef __cplusplus
16230 }
16231 #endif
16232 #endif
#define HPM_FFA
Definition: hpm_soc.h:392
static void hpm_dsp_and_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise AND of two u32 vectors.
Definition: hpm_math.h:1998
static void hpm_dsp_and_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise AND of two u16 vectors.
Definition: hpm_math.h:2017
static void hpm_dsp_and_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise AND of two u8 vectors.
Definition: hpm_math.h:2036
static void hpm_dsp_clip_q31(q31_t *src, q31_t *dst, q31_t low, q31_t high, uint32_t size)
Elementwise clipping of q31 function.
Definition: hpm_math.h:1927
static void hpm_dsp_clip_f32(float32_t *src, float32_t *dst, float32_t low, float32_t high, uint32_t size)
Elementwise clipping of f32 function.
Definition: hpm_math.h:1908
static void hpm_dsp_clip_q7(q7_t *src, q7_t *dst, q7_t low, q7_t high, uint32_t size)
Elementwise clipping of q7 function.
Definition: hpm_math.h:1965
static void hpm_dsp_clip_q15(q15_t *src, q15_t *dst, q15_t low, q15_t high, uint32_t size)
Elementwise clipping of q15 function.
Definition: hpm_math.h:1946
static void hpm_dsp_not_u16(u16_t *src, u16_t *dst, uint32_t size)
Compute the logical bitwise NOT of u16 vector.
Definition: hpm_math.h:2223
static void hpm_dsp_not_u32(u32_t *src, u32_t *dst, uint32_t size)
Compute the logical bitwise NOT of u32 vector.
Definition: hpm_math.h:2206
static void hpm_dsp_not_u8(u8_t *src, u8_t *dst, uint32_t size)
Compute the logical bitwise NOT of u8 vector.
Definition: hpm_math.h:2240
static void hpm_dsp_or_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise OR of two u8 vectors.
Definition: hpm_math.h:2105
static void hpm_dsp_or_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise OR of two u16 vectors.
Definition: hpm_math.h:2087
static void hpm_dsp_or_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise OR of two u32 vectors.
Definition: hpm_math.h:2069
static void hpm_dsp_xor_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u32 vectors.
Definition: hpm_math.h:2138
static void hpm_dsp_xor_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u16 vectors.
Definition: hpm_math.h:2156
static void hpm_dsp_xor_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u8 vectors.
Definition: hpm_math.h:2174
static void hpm_dsp_add_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Addition of U8 vectors.
Definition: hpm_math.h:1119
static void hpm_dsp_sub_u8_q7(uint8_t *src1, uint8_t *src2, q7_t *dst, uint32_t size)
Subtraction of u8 vectors.
Definition: hpm_math.h:1218
static q31_t hpm_dsp_div_q31(q31_t src1, q31_t src2)
Division of q31 inputs.
Definition: hpm_math.h:1345
static void hpm_dsp_mul_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Multiplication of q15 vectors.
Definition: hpm_math.h:1273
static uint32_t hpm_dsp_dprod_u8(uint8_t *src1, uint8_t *src2, uint32_t size)
Dot production of U8 vectors.
Definition: hpm_math.h:1606
static void hpm_dsp_scale_q31(q31_t *src, q31_t scalefract, int8_t shift, q31_t *dst, uint32_t size)
To multiply a q31 vectors by a q31 scale.
Definition: hpm_math.h:1739
static void hpm_dsp_add_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Addition of q15 vectors.
Definition: hpm_math.h:1079
static void hpm_dsp_offset_q7(q7_t *src, q7_t offset, q7_t *dst, uint32_t size)
The offset of q7 vectors.
Definition: hpm_math.h:1681
static void hpm_dsp_sub_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Subtraction of q15 vectors.
Definition: hpm_math.h:1178
static q31_t hpm_dsp_div_s64_u32(q63_t src1, uint32_t src2)
Division of q63 inputs divided by a positive 32 bits.
Definition: hpm_math.h:1362
static void hpm_dsp_mul_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Multiplication of q31 vectors.
Definition: hpm_math.h:1253
static q31_t hpm_dsp_div_u64_u32(uint64_t src1, uint32_t src2)
Division of positive 64-bits inputs divided by a positive 32-bits.
Definition: hpm_math.h:1379
static void hpm_dsp_abs_q7(q7_t *src, q7_t *dst, uint32_t size)
Absolute value of q7 vectors.
Definition: hpm_math.h:1020
static void hpm_dsp_neg_q15(q15_t *src, q15_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1437
static q63_t hpm_dsp_dprod_q31(q31_t *src1, q31_t *src2, uint32_t size)
Dot production of q31 vectors.
Definition: hpm_math.h:1501
static void hpm_dsp_offset_f32(float32_t *src, float32_t offset, float32_t *dst, uint32_t size)
The offset of floating-point vectors.
Definition: hpm_math.h:1621
static void hpm_dsp_scale_q7(q7_t *src, q7_t scalefract, int8_t shift, q7_t *dst, uint32_t size)
To multiply a q7 vectors by a q7 scale.
Definition: hpm_math.h:1785
static void hpm_dsp_add_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Addition of q7 vectors.
Definition: hpm_math.h:1099
static void hpm_dsp_offset_u8(uint8_t *src, q7_t offset, uint8_t *dst, uint32_t size)
The offset of U8 vectors.
Definition: hpm_math.h:1701
static void hpm_dsp_sub_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Subtraction of q7 vectors.
Definition: hpm_math.h:1198
static void hpm_dsp_shift_q31(q31_t *src, int8_t shift, q31_t *dst, uint32_t size)
Shifts a q31 vector with a specified shift number.
Definition: hpm_math.h:1846
static void hpm_dsp_scale_q15(q15_t *src, q15_t scalefract, int8_t shift, q15_t *dst, uint32_t size)
To multiply a q15 vectors by a q15 scale.
Definition: hpm_math.h:1762
static void hpm_dsp_add_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Addition of q31 vectors.
Definition: hpm_math.h:1059
static void hpm_dsp_offset_q15(q15_t *src, q15_t offset, q15_t *dst, uint32_t size)
The offset of q15 vectors.
Definition: hpm_math.h:1661
static void hpm_dsp_scale_f32(float32_t *src, float32_t scale, float32_t *dst, uint32_t size)
To multiply a floating-point vectors by a floating-point scale.
Definition: hpm_math.h:1716
static void hpm_dsp_offset_q31(q31_t *src, q31_t offset, q31_t *dst, uint32_t size)
The offset of q31 vectors.
Definition: hpm_math.h:1641
static void hpm_dsp_add_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Addition of floating-potint vectors.
Definition: hpm_math.h:1039
static void hpm_dsp_sub_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Subtraction of floating-point vectors.
Definition: hpm_math.h:1138
static void hpm_dsp_neg_f32(float32_t *src, float32_t *dst, uint32_t size)
Negation of floating-potint vectors.
Definition: hpm_math.h:1397
static void hpm_dsp_neg_q31(q31_t *src, q31_t *dst, uint32_t size)
Negation of q31 vectors.
Definition: hpm_math.h:1417
static void hpm_dsp_neg_q7(q7_t *src, q7_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1457
static void hpm_dsp_shift_q7(q7_t *src, int8_t shift, q7_t *dst, uint32_t size)
Shifts a q7 vector with a specified shift number.
Definition: hpm_math.h:1867
static q31_t hpm_dsp_dprod_q7(q7_t *src1, q7_t *src2, uint32_t size)
Dot production of q7 vectors.
Definition: hpm_math.h:1566
static q63_t hpm_dsp_dprod_q15(q15_t *src1, q15_t *src2, uint32_t size)
Dot production of q15 vectors.
Definition: hpm_math.h:1524
static q31_t hpm_dsp_dprod_q7xq15(q7_t *src1, q15_t *src2, uint32_t size)
Dot production of q7 * q15 vectors.
Definition: hpm_math.h:1589
static float32_t hpm_dsp_dprod_f32(float32_t *src1, float32_t *src2, uint32_t size)
Dot production of floating-point vectors.
Definition: hpm_math.h:1476
static void hpm_dsp_scale_u8(uint8_t *src, q7_t scalefract, int8_t shift, uint8_t *dst, uint32_t size)
To multiply a u8 vectors by a q7 scale.
Definition: hpm_math.h:1807
static void hpm_dsp_sub_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Subtraction of q31 vectors.
Definition: hpm_math.h:1158
static q31_t hpm_dsp_dprod_u8xq15(uint8_t *src1, q15_t *src2, uint32_t size)
Dot production of u8 * q15 vectors.
Definition: hpm_math.h:1548
static void hpm_dsp_abs_q31(q31_t *src, q31_t *dst, uint32_t size)
Absolute value of q31 vectors.
Definition: hpm_math.h:979
static void hpm_dsp_mul_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Multiplication of u8 vectors.
Definition: hpm_math.h:1313
static void hpm_dsp_shift_q15(q15_t *src, int8_t shift, q15_t *dst, uint32_t size)
Shifts a q15 vector with a specified shift number.
Definition: hpm_math.h:1825
static void hpm_dsp_div_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Division of floating-point vectors.
Definition: hpm_math.h:1328
static void hpm_dsp_abs_f32(float32_t *src, float32_t *dst, uint32_t size)
Absolute value of floating-potint vectors.
Definition: hpm_math.h:959
static void hpm_dsp_mul_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Multiplication of floating-point vectors.
Definition: hpm_math.h:1233
static void hpm_dsp_abs_q15(q15_t *src, q15_t *dst, uint32_t size)
Absolute value of q15 vectors.
Definition: hpm_math.h:1000
static void hpm_dsp_shift_u8(uint8_t *src, int8_t shift, uint8_t *dst, uint32_t size)
Shifts a u8 vector for a specified shift number.
Definition: hpm_math.h:1888
static void hpm_dsp_mul_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Multiplication of q7 vectors.
Definition: hpm_math.h:1293
static void hpm_dsp_cmul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t size)
Multiply two folating-point complex vector.
Definition: hpm_math.h:2603
static void hpm_dsp_cconj_q15(const q15_t *src, q15_t *dst, uint32_t size)
Conjugate the q15 complex vector.
Definition: hpm_math.h:2338
static void hpm_dsp_cmul_real_f32(const float32_t *src, const float32_t *real, float32_t *dst, uint32_t size)
Multiply the folating-point complex vector by a real vector.
Definition: hpm_math.h:2664
static void hpm_dsp_cdprod_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q31_t *dst)
Compute the dot product of the q31 complex vector.
Definition: hpm_math.h:2448
static void hpm_dsp_cconj_f32(const float32_t *src, float32_t *dst, uint32_t size)
Conjugate the floating-potint complex vector.
Definition: hpm_math.h:2318
static void hpm_dsp_cmul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t size)
Multiply two q15 complex vector.
Definition: hpm_math.h:2624
static void hpm_dsp_cmag_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude of the floating-potint complex vector.
Definition: hpm_math.h:2485
static void hpm_dsp_cmag_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude of the q15 complex vector.
Definition: hpm_math.h:2505
static void hpm_dsp_cmag_sqr_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude squared of the floating-potint complex vector.
Definition: hpm_math.h:2544
static void hpm_dsp_cmag_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude of the q31 complex vector.
Definition: hpm_math.h:2525
static void hpm_dsp_cmul_real_q31(const q31_t *src, const q31_t *real, q31_t *dst, uint32_t size)
Multiply the q31 complex vector by a real vector.
Definition: hpm_math.h:2704
static void hpm_dsp_cconj_q31(const q31_t *src, q31_t *dst, uint32_t size)
Conjugate the q31 complex vector.
Definition: hpm_math.h:2358
static void hpm_dsp_cdprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *dst)
Compute the dot product of the floating-potint complex vector.
Definition: hpm_math.h:2377
static void hpm_dsp_cdprod_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q15_t *dst)
Compute the dot product of the q15 complex vector.
Definition: hpm_math.h:2413
static void hpm_dsp_cdprod_typ2_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q63_t *rout, q63_t *iout)
Compute the dot product type2 of the q31 complex vector.
Definition: hpm_math.h:2467
static void hpm_dsp_cmag_sqr_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude squared of the q15 complex vector.
Definition: hpm_math.h:2564
static void hpm_dsp_cmul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size)
Multiply two q31 complex vector.
Definition: hpm_math.h:2645
static void hpm_dsp_cmag_sqr_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude squared of the q31 complex vector.
Definition: hpm_math.h:2584
static void hpm_dsp_cdprod_typ2_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *rout, float32_t *iout)
Compute the dot product type2 of the floating-potint complex vector.
Definition: hpm_math.h:2392
static void hpm_dsp_cdprod_typ2_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q31_t *rout, q31_t *iout)
Compute the dot product type2 of the q15 complex vector.
Definition: hpm_math.h:2431
static void hpm_dsp_cmul_real_q15(const q15_t *src, const q15_t *real, q15_t *dst, uint32_t size)
Multiply the q15 complex vector by a real vector.
Definition: hpm_math.h:2684
static q15_t hpm_dsp_pid_q15(riscv_dsp_pid_q15_t *instance, q15_t src)
Definition: hpm_math.h:2930
static void hpm_dsp_init_pid_q15(riscv_dsp_pid_q15_t *instance, int32_t set)
PID initializatopn control function of Q15 formats.
Definition: hpm_math.h:2948
static void hpm_dsp_inv_park_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta, q31_t sin, q31_t cos)
Inverse Park transform of q31 input.
Definition: hpm_math.h:2859
static void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
Park transform of q31 input.
Definition: hpm_math.h:2824
static void hpm_dsp_init_pid_f32(riscv_dsp_pid_f32_t *instance, int32_t set)
PID initializatopn control function of floating-point formats.
Definition: hpm_math.h:2890
static void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
Park transform of floating-point input.
Definition: hpm_math.h:2805
static void hpm_dsp_inv_clarke_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b)
Inverse Clarke transform of q31 input.
Definition: hpm_math.h:2788
static void hpm_dsp_init_pid_q31(riscv_dsp_pid_q31_t *instance, int32_t set)
PID initializatopn control function of Q31 formats.
Definition: hpm_math.h:2923
static void hpm_dsp_inv_clarke_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b)
Inverse Clarke transform of floating-point input.
Definition: hpm_math.h:2772
static q31_t hpm_dsp_pid_q31(riscv_dsp_pid_q31_t *instance, q31_t src)
PID control of Q31 input.
Definition: hpm_math.h:2904
static void hpm_dsp_clarke_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta)
Clarke transform of floating-point input.
Definition: hpm_math.h:2741
static void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
Inverse Park transform of floating-point input.
Definition: hpm_math.h:2841
static float32_t hpm_dsp_pid_f32(riscv_dsp_pid_f32_t *instance, float32_t src)
PID control of floating-point input.
Definition: hpm_math.h:2872
static void hpm_dsp_clarke_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta)
Clarke transform of q31 input.
Definition: hpm_math.h:2757
static float32_t hpm_dsp_dist_euclidean_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Euclidean distance between two vectors.
Definition: hpm_math.h:3091
static float32_t hpm_dsp_dist_city_block_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cityblock (Manhattan) distance between two vectors.
Definition: hpm_math.h:3037
static float32_t hpm_dsp_bdist_sokal_sneath_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Sneath distance between two vectors.
Definition: hpm_math.h:3236
static float32_t hpm_dsp_dist_bray_curtis_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Bray-Curtis distance between two vectors.
Definition: hpm_math.h:2983
static float32_t hpm_dsp_bdist_kulsinski_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Kulsinski distance between two vectors.
Definition: hpm_math.h:3200
static float32_t hpm_dsp_bdist_jaccard_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Jaccard distance between two vectors.
Definition: hpm_math.h:3182
static float32_t hpm_dsp_dist_canberra_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Canberra distance between two vectors.
Definition: hpm_math.h:3001
static float32_t hpm_dsp_bdist_hamming_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Hamming distance between two vectors.
Definition: hpm_math.h:3164
static float32_t hpm_dsp_dist_cos_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cosine distance between two vectors.
Definition: hpm_math.h:3073
static float32_t hpm_dsp_dist_corr_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Correlation distance between two vectors.
Definition: hpm_math.h:3055
static float32_t hpm_dsp_dist_chebyshev_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Chebyshev distance between two vectors.
Definition: hpm_math.h:3019
static float32_t hpm_dsp_bdist_rogers_tanimoto_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Roger Stanimoto distance between two vectors.
Definition: hpm_math.h:3254
static float32_t hpm_dsp_bdist_yule_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Yule distance between two vectors.
Definition: hpm_math.h:3272
static float32_t hpm_dsp_bdist_russell_rao_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Russell-Rao distance between two vectors.
Definition: hpm_math.h:3290
static float32_t hpm_dsp_bdist_dice_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Dice distance between two vectors.
Definition: hpm_math.h:3146
static float32_t hpm_dsp_bdist_sokal_michener_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Michener distance between two vectors.
Definition: hpm_math.h:3218
static float32_t hpm_dsp_dist_minkowski_f32(const float32_t *src1, const float32_t *src2, int32_t order, uint32_t size)
Minkowski distance between two vectors.
Definition: hpm_math.h:3128
static float32_t hpm_dsp_dist_jensen_shannon_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Jensen-Shannon distance between two vectors.
Definition: hpm_math.h:3109
#define FFA_DATA_TYPE_COMPLEX_Q31
Definition: hpm_ffa_drv.h:39
hpm_stat_t ffa_calculate_fft_blocking(FFA_Type *ptr, fft_xfer_t *fft_xfer)
Perform FFT transformation in blocking mode.
Definition: hpm_ffa_drv.c:118
#define FFA_DATA_TYPE_COMPLEX_Q15
Definition: hpm_ffa_drv.h:40
static void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3929
static void hpm_dsp_bq_df1_32x64_q31(const riscv_dsp_bq_df1_32x64_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3995
static void hpm_dsp_lfir_f32(const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point lattice FIR filter.
Definition: hpm_math.h:3445
static void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3539
static void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Correlation of the q7 vectors.
Definition: hpm_math.h:3955
static void hpm_dsp_dcmfir_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3497
static void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Definition: hpm_math.h:3649
static void hpm_dsp_liir_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4038
static void hpm_dsp_bq_df1_f32(const riscv_dsp_bq_df1_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3965
static void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t size)
Definition: hpm_math.h:3533
static void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Convolution of the floating-point vectors.
Definition: hpm_math.h:3667
static void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3551
static void hpm_dsp_fir_fast_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3411
static void hpm_dsp_bq_df2T_f64(const riscv_dsp_bq_df2T_f64_t *instance, float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4007
static void hpm_dsp_bq_df1_fast_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3977
static void hpm_dsp_dcmfir_fast_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3509
static void hpm_dsp_liir_fast_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4032
static void hpm_dsp_lms_q31(const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Function for the q31 LMS filter.
Definition: hpm_math.h:3595
static void hpm_dsp_fir_f32(const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point FIR filter.
Definition: hpm_math.h:3330
static void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3721
static void hpm_dsp_bq_df2T_f32(const riscv_dsp_bq_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4001
static void hpm_dsp_lms_f32(const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the floatint-point standard LMS Filters.
Definition: hpm_math.h:3572
static void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Correlation of the floating-point vectors.
Definition: hpm_math.h:3873
static void hpm_dsp_fir_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3392
static void hpm_dsp_lfir_q15(const riscv_dsp_lfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 lattice FIR filter.
Definition: hpm_math.h:3460
static int32_t hpm_dsp_conv_partial_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q15 vectors.
Definition: hpm_math.h:3798
static void hpm_dsp_dcmfir_f32(const riscv_dsp_dcmfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3485
static void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the f32 normalized LMS filter.
Definition: hpm_math.h:3629
static void hpm_dsp_upsplfir_q15(const riscv_dsp_upsplfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3521
static void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Convolution of the q15 vectors.
Definition: hpm_math.h:3693
static void hpm_dsp_upsplfir_q31(const riscv_dsp_upsplfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3527
static void hpm_dsp_dcmfir_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3491
static void hpm_dsp_dcmfir_fast_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3503
static void hpm_dsp_spafir_q31(riscv_dsp_spafir_q31_t *instance, q31_t *src, q31_t *dst, q31_t *buf, uint32_t size)
Definition: hpm_math.h:3545
static void hpm_dsp_fir_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3351
static void hpm_dsp_lfir_q31(const riscv_dsp_lfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 lattice FIR filter.
Definition: hpm_math.h:3479
static void hpm_dsp_upsplfir_f32(const riscv_dsp_upsplfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3515
static int32_t hpm_dsp_conv_partial_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q31 vectors.
Definition: hpm_math.h:3824
static void hpm_dsp_liir_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4026
static int32_t hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the floating-point vectors.
Definition: hpm_math.h:3772
static void hpm_dsp_fir_fast_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3372
static void hpm_dsp_liir_f32(const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4020
static void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Convolution of the q7 vectors.
Definition: hpm_math.h:3747
static void hpm_dsp_bq_df1_fast_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3989
static void hpm_dsp_bq_df1_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3971
static void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Correlation of the q15 vectors.
Definition: hpm_math.h:3899
static void hpm_dsp_liir_fast_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4044
static int32_t hpm_dsp_conv_partial_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q7 vectors.
Definition: hpm_math.h:3850
static void hpm_dsp_bq_df1_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3983
static void hpm_dsp_lms_q15(const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Function for the q15 LMS filter.
Definition: hpm_math.h:3618
static void hpm_dsp_bq_stereo_df2T_f32(const riscv_dsp_bq_stereo_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4013
static void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Structure for the q31 normalized LMS filter.
Definition: hpm_math.h:3641
static void hpm_dsp_fir_q7(const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t size)
Function for the q7 FIR filter.
Definition: hpm_math.h:3430
static void hpm_dsp_mat_sub_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Substraction of two floating-potint matrices.
Definition: hpm_math.h:4539
static void hpm_dsp_mat_oprod_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size1, uint32_t size2)
Outer production of two q31 matrices.
Definition: hpm_math.h:4723
static void hpm_dsp_mat_trans_q15(const q15_t *src, q15_t *dst, uint32_t row, uint32_t col)
Transpose the q15 matricex.
Definition: hpm_math.h:4634
static void hpm_dsp_mat_mul_mxv_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for f32 formats.
Definition: hpm_math.h:4757
static void hpm_dsp_mat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point matrices.
Definition: hpm_math.h:4209
static void hpm_dsp_cmat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point complex matrices.
Definition: hpm_math.h:4240
static int32_t hpm_dsp_mat_inv_f32(float32_t *src, float32_t *dst, uint32_t size)
Compute the inverse matrix of the floating-potint matrix.
Definition: hpm_math.h:4178
static void hpm_dsp_mat_mul_vxm_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t col, uint32_t col2)
Multiplication of q7 vetor by matrix.
Definition: hpm_math.h:4417
static void hpm_dsp_mat_add_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Addition of two q31 matrices.
Definition: hpm_math.h:4159
static void hpm_dsp_mat_mul_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4220
static void hpm_dsp_mat_trans_u8(const uint8_t *src, uint8_t *dst, uint32_t row, uint32_t col)
Transpose the u8 matricex.
Definition: hpm_math.h:4670
static void hpm_dsp_mat_scale_q15(const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q15 matrix.
Definition: hpm_math.h:4472
static void hpm_dsp_mat_mul_fast_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4341
static void hpm_dsp_mat_mul_mxv_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q15 formats.
Definition: hpm_math.h:4777
static void hpm_dsp_mat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 matrices.
Definition: hpm_math.h:4331
static void hpm_dsp_mat_trans_f64(const float64_t *src, float64_t *dst, uint32_t row, uint32_t col)
Transpose the double-precision floating-potint matrices.
Definition: hpm_math.h:4602
static void hpm_dsp_cmat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 complex matrices.
Definition: hpm_math.h:4304
static void hpm_dsp_cmat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 complex matrices.
Definition: hpm_math.h:4368
static void hpm_dsp_mat_trans_f32(const float32_t *src, float32_t *dst, uint32_t row, uint32_t col)
Transpose the floating-potint matricex.
Definition: hpm_math.h:4620
static void hpm_dsp_mat_mul_mxv_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q31 formats.
Definition: hpm_math.h:4797
static void hpm_dsp_mat_mul_fast_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4277
static void hpm_dsp_mat_add_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
Addition of two floating-potint matrices.
Definition: hpm_math.h:4117
static void hpm_dsp_mat_trans_q31(const q31_t *src, q31_t *dst, uint32_t row, uint32_t col)
Transpose the q31 matricex.
Definition: hpm_math.h:4652
static void hpm_dsp_mat_sub_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
Substraction of two double-precision floating-potint matrices.
Definition: hpm_math.h:4519
static void hpm_dsp_mat_add_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Addition of two floating-potint matrices.
Definition: hpm_math.h:4098
static void hpm_dsp_mat_add_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Addition of two q15 matrices.
Definition: hpm_math.h:4138
static void hpm_dsp_mat_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of floating-potint matrix.
Definition: hpm_math.h:4447
static void hpm_dsp_mat_mul_mxv_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q7 formats.
Definition: hpm_math.h:4817
static void hpm_dsp_mat_mul_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q7 matrices.
Definition: hpm_math.h:4395
static int32_t hpm_dsp_mat_pwr2_cache_f64(const float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4431
static void hpm_dsp_mat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 matrices.
Definition: hpm_math.h:4267
static void hpm_dsp_mat_scale_q31(const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q31 matrix.
Definition: hpm_math.h:4497
static void hpm_dsp_mat_trans_q7(const q7_t *src, q7_t *dst, uint32_t row, uint32_t col)
Transpose the q7 matrices.
Definition: hpm_math.h:4685
static void hpm_dsp_mat_sub_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Substraction of two q31 matrices.
Definition: hpm_math.h:4581
static int32_t hpm_dsp_mat_inv_f64(float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4188
static void hpm_dsp_mat_sub_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Substraction of two q15 matrices.
Definition: hpm_math.h:4560
static void hpm_nn_activate_s16(q15_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses sigmoid or tanh function to perform activation for signed 16-bit integer input vec...
Definition: hpm_math.h:6903
static void size
Definition: hpm_math.h:6938
static void hpm_nn_leaky_relu_s8(q7_t *in_out, uint32_t size, q15_t slope) riscv_nn_leaky_relu_s8(in_out
This function uses the leaky ReLU function to perform activation for signed 8-bit integer input vecto...
static void hpm_nn_relu_s16(q15_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 16-bit integer input vectors.
Definition: hpm_math.h:6989
static void hpm_nn_activate_s8(q7_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses the sigmoid or tanh function to perform activation for signed 8-bit integer input ...
Definition: hpm_math.h:6876
static void slope
Definition: hpm_math.h:6938
static void hpm_nn_relu_any_s8(q7_t *data, uint16_t size, q7_t max_val)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6949
static void hpm_nn_relu_s8(q7_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6974
static void hpm_nn_add_s8_sym(const q7_t *in_tensor1, const q7_t *in_tensor2, const int16_t *scale1, const int16_t *scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7097
static int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_scale1, const int32_t in_rshift1, const int32_t in_offset2, const int32_t in_scale2, const int32_t in_rshift2, const int32_t lshift, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_rshift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise addition for signed 8-bit integer input vectors.
Definition: hpm_math.h:7205
static int hpm_nn_ew_mul_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_offset2, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_shift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise multiplication for signed 8-bit integer input vectors.
Definition: hpm_math.h:7274
static void hpm_nn_add_s8_sym_round(const q7_t *in_tensor1, const q7_t *in_tensor2, const uint32_t scale1, const uint32_t scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7135
static void hpm_nn_concate_s8_z(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_z, const uint32_t out_offset_z)
This function concatenates the int8_t/uint8_t input tensor along the z-axis with the output tensor.
Definition: hpm_math.h:7534
static void hpm_nn_concate_s8_x(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_x, const uint32_t out_offset_x)
This function concatenates the int8_t/uint8_t input tensor along the x-axis with the output tensor.
Definition: hpm_math.h:7464
static void hpm_nn_concate_s8_y(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_y, const uint32_t out_offset_y)
This function concatenates the int8_t/uint8_t input tensor along the y-axis with the output tensor.
Definition: hpm_math.h:7499
static void hpm_nn_concate_s8_w(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint32_t out_offset_w)
This function concatenates the int8_t/uint8_t input tensor along the w-axis with the output tensor.
Definition: hpm_math.h:7429
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9669
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12104
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:12032
static int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(const uint8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint8_t *ker_weight, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t in_offset, const int32_t ker_offset, const int32_t out_offset, uint8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t act_min, const int32_t act_max, const int32_t out_shift, const int32_t out_scale)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:13291
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with bias inputs and ...
Definition: hpm_math.h:10079
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with symmetric...
Definition: hpm_math.h:11781
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:9103
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:9342
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10315
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution with shift-based quantization on th...
Definition: hpm_math.h:8522
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11895
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:9262
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10956
static int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12837
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:8941
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit interger inputs/outputs in any x and ...
Definition: hpm_math.h:12685
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12250
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:8694
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution with shift-based quantization on the ou...
Definition: hpm_math.h:8340
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:9422
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 8-...
Definition: hpm_math.h:9963
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:10021
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11550
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11432
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:9730
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:9183
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:11959
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution with shift-based quantization on the outputs.
Definition: hpm_math.h:7899
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with symmetric quan...
Definition: hpm_math.h:10487
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ch_mult, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *tmp_buf)
This function performs depthwise convolution for signed 8-bit interger inputs/outputs in any x and y ...
Definition: hpm_math.h:13086
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution for RGB images with shift-based quantization ...
Definition: hpm_math.h:7741
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9905
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10544
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:12392
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10601
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution in any x and y dimensions with shift-bas...
Definition: hpm_math.h:8172
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9847
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:11307
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10256
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11838
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with bias inpu...
Definition: hpm_math.h:11491
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12948
static int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(const int8_t *in_tensor, const int32_t in_tensor_dim_x, const int32_t in_tensor_dim_y, const int32_t in_tensor_ch, const int8_t *ker_weight, const int32_t out_tensor_ch, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_tensor_dim_x, const int32_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const int32_t dilation_x, const int32_t dilation_y, int16_t *tmp_buf)
This function performs depthwise 3x3 kernels convolution for signed 8-bit integer inputs/outputs in a...
Definition: hpm_math.h:13000
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with bias inputs...
Definition: hpm_math.h:11373
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:10810
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10138
static void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution in any x and y dimensions with shift-based qu...
Definition: hpm_math.h:7989
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:10665
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12737
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with bias inputs an...
Definition: hpm_math.h:10197
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:8778
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12462
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12177
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(const q15_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution in any x and y dimensions with shift-ba...
Definition: hpm_math.h:8435
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10430
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12533
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with sy...
Definition: hpm_math.h:9789
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:9023
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast signed 8-bit integer convolution for RGB images with shift-based quantiza...
Definition: hpm_math.h:7820
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11724
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution in any x and y dimensions with shif...
Definition: hpm_math.h:8613
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:11167
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:11237
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:13238
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *in_tmp_buf)
This function performs fast depthwise convolution for signed 8-bit integer inputs/outputs in any x an...
Definition: hpm_math.h:13181
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:8859
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution with shift-based quantization on the out...
Definition: hpm_math.h:8077
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9549
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with symmetric q...
Definition: hpm_math.h:11667
static int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t pad_x, const uint16_t stride_x, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, q15_t *in_tmp_buf)
This function performs 1xn kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12786
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11609
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 16-bit integer convolution with shift-based quantization on the outputs...
Definition: hpm_math.h:8259
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10883
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:12321
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9609
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12604
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:11097
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9488
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:11027
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:7654
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with symmetric quanti...
Definition: hpm_math.h:10373
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10738
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs convolution for signed 8-bit integer inputs/outputs in any x and y dimensions ...
Definition: hpm_math.h:12891
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with shift-based quantizati...
Definition: hpm_math.h:14179
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14529
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14925
static int32_t out_vec
Definition: hpm_math.h:14118
static int32_t in_tmp_buf
Definition: hpm_math.h:14119
static int32_t hpm_nn_fc_s8_s16_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14616
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14391
static int32_t hpm_nn_fc_u8_s16_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:15196
static int32_t bias
Definition: hpm_math.h:14118
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14300
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14258
static void hpm_nn_fc_mat_vec_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast.
Definition: hpm_math.h:15270
static int32_t hpm_nn_fc_s8_s16_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:15062
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec, const int8_t *wt_mat, const uint16_t in_vec_col, const uint16_t wt_mat_row, const uint16_t in_vec_group, const int32_t in_offset, const int32_t wt_offset, const int32_t out_scale, const int32_t out_shift, const int32_t out_offset, const int32_t *bias, int8_t *out_vec, const int32_t act_min, const int32_t act_max, q15_t *tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with bias inputs and asymmet...
Definition: hpm_math.h:15312
static int32_t hpm_nn_fc_s8_s8_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with symmetric quant...
Definition: hpm_math.h:14574
static void hpm_nn_fc_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 8-bit weight data and name...
Definition: hpm_math.h:15227
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14972
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
This function is used to get the needed size, in bytes, by the temporary buffer of riscv_nn_fc_s8_s8_...
Definition: hpm_math.h:15351
static int32_t out_rshift
Definition: hpm_math.h:14118
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14878
static int32_t hpm_nn_fc_s8_s8_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with interleaved mul...
Definition: hpm_math.h:15017
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with interleaved multiplicat...
Definition: hpm_math.h:14144
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14483
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf) return riscv_nn_fc_s8_s8_s8_sft_bias(in_vec
This is a fully connected layer function for signed 8-bit integer inputs with shift-based quantizatio...
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14785
static int32_t wt_row_num
Definition: hpm_math.h:14117
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs and...
Definition: hpm_math.h:14345
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs a...
Definition: hpm_math.h:14437
static int32_t bias_lshift
Definition: hpm_math.h:14118
static void hpm_nn_fc_s16_wt_converter(const q15_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q15_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 16-bit weight data and nam...
Definition: hpm_math.h:15249
static int32_t wt_mat
Definition: hpm_math.h:14117
static int32_t hpm_nn_fc_u8_s16_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14742
static int32_t hpm_nn_fc_u8_u8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with symmetric qua...
Definition: hpm_math.h:14658
static int32_t hpm_nn_fc_u8_s8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:15151
static int32_t hpm_nn_fc_u8_u8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with interleaved m...
Definition: hpm_math.h:15106
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14832
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias_fast(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with interleaved multiplica...
Definition: hpm_math.h:14222
static int32_t hpm_nn_fc_u8_s8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14700
static int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y, const int in_tensor_dim_x, const int out_tensor_dim_y, const int out_tensor_dim_x, const int stride_y, const int stride_x, const int ker_dim_y, const int ker_dim_x, const int pad_y, const int pad_x, const int act_min, const int act_max, const int in_tensor_ch, int8_t *in_tensor, int16_t *in_tmp_buf, int8_t *out_tensor)
This is an average pooling function for S8 inputs with any x and y dimension with the actvating param...
Definition: hpm_math.h:15627
static void hpm_nn_maxpool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15709
static int32_t hpm_nn_maxpool_HWC_s8_any_act(const uint16_t in_tensor_dim_y, const uint16_t in_tensor_dim_x, const uint16_t out_tensor_dim_y, const uint16_t out_tensor_dim_x, const uint16_t stride_y, const uint16_t stride_x, const uint16_t ker_dim_y, const uint16_t ker_dim_x, const uint16_t pad_y, const uint16_t pad_x, const int8_t act_min, const int8_t act_max, const uint16_t in_tensor_ch, int8_t *in_tensor, int16_t *tmp_buffer, int8_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs in any x and y dimensions with the act...
Definition: hpm_math.h:15754
static void hpm_nn_avepool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is an average pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15505
static int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
This function is used to obtain the required size, in bytes, for the input temporary buffer of riscv_...
Definition: hpm_math.h:15669
static void hpm_nn_avepool_HWC_s8_any(q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q7_t *in_tmp_buf, q7_t *out_tensor, const uint16_t out_lshift)
This is an average pooling function for signed 8-bit integer inputs in any x and y dimensions.
Definition: hpm_math.h:15570
static void hpm_nn_softmax_s8_hp(const int8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, int8_t *out_tensor)
This is a softmax function for signed 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15956
static void hpm_nn_softmax_u8_hp(const uint8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, uint8_t *out_tensor)
This is a softmax function for unsigned 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15987
static void hpm_nn_softmax_s8_fast(const q7_t *in_vec, const uint16_t size, q7_t *out_vec)
This is a softmax function for signed 8-bit integer input vectors.
Definition: hpm_math.h:15913
static void hpm_nn_softmax_s16_fast(const q15_t *in_vec, const uint16_t size, q15_t *out_vec)
This is a softmax function for signed 16-bit integer input vectors.
Definition: hpm_math.h:15931
static int32_t hpm_nn_top_k_s8(q7_t *in_vec, uint32_t size, uint32_t k, q7_t *val, uint32_t *idx)
This function finds the k largest values and their indices from the signed 8-bit integer input vector...
Definition: hpm_math.h:16134
static void hpm_nn_reshape_s8(const int8_t *in_tensor, int8_t *out_tensor, const uint32_t size)
This function turns the input tensor into another tensor with the same data but in a different shape.
Definition: hpm_math.h:16104
static void hpm_dsp_sort_merge_init_f32(riscv_dsp_sort_merge_f32_t *instance, riscv_dsp_sort_order order, float32_t *buf)
Definition: hpm_math.h:6586
__STATIC_FORCEINLINE int32_t hpm_nn_read_s8x4_ia(const int8_t **in_s8)
Read 4 s8 from s8 pointer and post increment pointer.
Definition: hpm_math.h:6782
static void hpm_dsp_sort_f32(const riscv_dsp_sort_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Generic sorting function.
Definition: hpm_math.h:6564
#define Q31_MIN
Definition: hpm_math.h:6646
#define RIGHT_SHIFT(_shift)
Definition: hpm_math.h:6644
#define LEFT_SHIFT(_shift)
Definition: hpm_math.h:6643
__STATIC_FORCEINLINE void hpm_nn_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
Definition: hpm_math.h:6792
__STATIC_FORCEINLINE q31_t hpm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
Definition: hpm_math.h:6726
__STATIC_FORCEINLINE q31_t hpm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
Rounding divide by power of two.
Definition: hpm_math.h:6707
#define Q31_MAX
Definition: hpm_math.h:6645
static void hpm_dsp_sort_merge_f32(const riscv_dsp_sort_merge_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Merge sort.
Definition: hpm_math.h:6626
__STATIC_FORCEINLINE const q7_t * read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words
Definition: hpm_math.h:6765
__STATIC_FORCEINLINE const q7_t * read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words with reordering
Definition: hpm_math.h:6751
__STATIC_FORCEINLINE q31_t hpm_nn_read_q7x4_ia(const q7_t **in_q7)
Read 4 q7 from q7 pointer and post increment pointer.
Definition: hpm_math.h:6737
static void write_q15x2_ia(q15_t **pQ15, q31_t value)
Definition: hpm_math.h:6648
static void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t *instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
Definition: hpm_math.h:6517
__STATIC_FORCEINLINE q31_t hpm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
Saturating doubling high multiply. Result matches NEON instruction VQRDMULH.
Definition: hpm_math.h:6682
__STATIC_FORCEINLINE q31_t hpm_nn_read_q15x2_ia(const q15_t **in_q15)
Read 2 q15 elements and post increment pointer.
Definition: hpm_math.h:6664
static float32_t hpm_dsp_std_f32(const float32_t *src, uint32_t size)
Standard deviation of the floating-potint vector.
Definition: hpm_math.h:565
static uint32_t hpm_dsp_gaussian_naive_bayes_est_f32(const riscv_dsp_gaussian_naivebayes_f32_t *instance, const float32_t *src, float32_t *buf)
Naive Gaussian Bayesian Estimator.
Definition: hpm_math.h:810
static float32_t hpm_dsp_var_f32(const float32_t *src, uint32_t size)
Variance of the floating-potint vector.
Definition: hpm_math.h:656
static q15_t hpm_dsp_max_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum value of the q15 vector.
Definition: hpm_math.h:120
static q63_t hpm_dsp_pwr_q15(const q15_t *src, uint32_t size)
Sum of the squares of the q15 vector.
Definition: hpm_math.h:422
static q7_t hpm_dsp_mean_q7(const q7_t *src, uint32_t size)
Mean value of the q7 vector.
Definition: hpm_math.h:361
static q15_t hpm_dsp_absmin_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q15 vector.
Definition: hpm_math.h:908
static uint8_t hpm_dsp_max_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Max value of the u8 vector.
Definition: hpm_math.h:180
static q7_t hpm_dsp_absmin_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q7 vector.
Definition: hpm_math.h:922
static uint8_t hpm_dsp_min_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Minimum value of the u8 vector.
Definition: hpm_math.h:275
static q7_t hpm_dsp_max_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum value of the q7 vector.
Definition: hpm_math.h:160
static float32_t hpm_dsp_absmin_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the floating-potint vector.
Definition: hpm_math.h:880
static q31_t hpm_dsp_rms_q31(const q31_t *src, uint32_t size)
RMS of the q31 vector.
Definition: hpm_math.h:545
static q7_t hpm_dsp_min_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum value of the q7 vector.
Definition: hpm_math.h:255
static q63_t hpm_dsp_var_q31(const q31_t *src, uint32_t size)
Variance of the q31 vector.
Definition: hpm_math.h:708
static q31_t hpm_dsp_max_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum value of the q31 vector.
Definition: hpm_math.h:140
static q31_t hpm_dsp_mean_q31(const q31_t *src, uint32_t size)
Mean value of the q31 vector.
Definition: hpm_math.h:337
static q31_t hpm_dsp_absmax_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q31 vector.
Definition: hpm_math.h:852
static float32_t hpm_dsp_entropy_f32(const float32_t *src, uint32_t size)
Entropy of the floating-potint vector.
Definition: hpm_math.h:729
static float32_t hpm_dsp_mean_f32(const float32_t *src, uint32_t size)
Mean value of the floating-potint vector.
Definition: hpm_math.h:289
static float32_t hpm_dsp_rms_f32(const float32_t *src, uint32_t size)
RMS of the floating-potint vector.
Definition: hpm_math.h:493
static float32_t hpm_dsp_lse_f32(const float32_t *src, uint32_t size)
Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:770
static q31_t hpm_dsp_pwr_q7(const q7_t *src, uint32_t size)
Sum of the squares of the q7 vector.
Definition: hpm_math.h:473
static q31_t hpm_dsp_absmin_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q31 vector.
Definition: hpm_math.h:894
static float32_t hpm_dsp_max_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum value of the floating-potint vector.
Definition: hpm_math.h:95
static float32_t hpm_dsp_absmax_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the floating-potint vector.
Definition: hpm_math.h:824
static q31_t hpm_dsp_min_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum value of the q31 vector.
Definition: hpm_math.h:235
static q15_t hpm_dsp_std_u8(const uint8_t *src, uint32_t size)
Standard deviation of the u8 vector.
Definition: hpm_math.h:642
static q15_t hpm_dsp_min_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum value of the q15 vector.
Definition: hpm_math.h:215
static q15_t hpm_dsp_rms_q15(const q15_t *src, uint32_t size)
RMS of the q15 vector.
Definition: hpm_math.h:519
static float32_t hpm_dsp_min_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum value of the floating-potint vector.
Definition: hpm_math.h:195
static q63_t hpm_dsp_pwr_q31(const q31_t *src, uint32_t size)
Sum of the squares of the q31 vector.
Definition: hpm_math.h:448
static float32_t hpm_dsp_max_val_f32(const float32_t *src, uint32_t size)
Definition: hpm_math.h:107
static q31_t hpm_dsp_var_q15(const q15_t *src, uint32_t size)
Variance of the q15 vector.
Definition: hpm_math.h:682
static q7_t hpm_dsp_absmax_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q7 vector.
Definition: hpm_math.h:866
static q15_t hpm_dsp_mean_q15(const q15_t *src, uint32_t size)
Mean value of the q15 vector.
Definition: hpm_math.h:313
static q15_t hpm_dsp_std_q15(const q15_t *src, uint32_t size)
Standard deviation of the q15 vector.
Definition: hpm_math.h:591
static float32_t hpm_dsp_lse_dprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *buffer)
Dot product with Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:790
static q15_t hpm_dsp_absmax_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q15 vector.
Definition: hpm_math.h:838
static uint8_t hpm_dsp_mean_u8(const uint8_t *src, uint32_t size)
Mean value of the u8 vector.
Definition: hpm_math.h:383
static q31_t hpm_dsp_std_q31(const q31_t *src, uint32_t size)
Standard deviation of the q31 vector.
Definition: hpm_math.h:617
static float32_t hpm_dsp_relative_entropy_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Relative Entropy of the floating-potint vector.
Definition: hpm_math.h:752
static float32_t hpm_dsp_pwr_f32(const float32_t *src, uint32_t size)
Sum of the squares of the floating-potint vector.
Definition: hpm_math.h:397
static void hpm_dsp_svm_linear_est_f32(const riscv_dsp_svm_linear_f32_t *instance, const float32_t *src, int32_t *result)
SVM linear prediction.
Definition: hpm_math.h:4857
static void hpm_dsp_svm_rbf_est_f32(const riscv_dsp_svm_rbf_f32_t *instance, const float32_t *src, int32_t *result)
SVM rbf prediction.
Definition: hpm_math.h:4885
static void hpm_dsp_svm_poly_est_f32(const riscv_dsp_svm_poly_f32_t *instance, const float32_t *src, int32_t *result)
SVM polynomial prediction.
Definition: hpm_math.h:4899
static void hpm_dsp_svm_sigmoid_est_f32(const riscv_dsp_svm_sigmoid_f32_t *instance, const float32_t *src, int32_t *result)
SVM Sigmoid prediction.
Definition: hpm_math.h:4871
static void hpm_dsp_cifft_q15(q15_t *src, uint32_t m)
cifft of q15 vectors.
Definition: hpm_math.h:5356
static void hpm_dsp_dct4_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5777
static int32_t hpm_dsp_cfft_rd2_f32(float32_t *src, uint32_t m)
cfft_rd2 of f32 vectors.
Definition: hpm_math.h:4955
static void hpm_dsp_cfft_f64(float64_t *src, uint32_t m)
cfft of f64 vectors.
Definition: hpm_math.h:5275
static void hpm_dsp_idct4_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5796
static void hpm_dsp_cifft_f32(float32_t *src, uint32_t m)
cifft of f32 vectors.
Definition: hpm_math.h:5292
static int32_t hpm_dsp_cifft_rd4_q15(q15_t *src, uint32_t m)
cifft_rd4 of q15 vectors.
Definition: hpm_math.h:5179
static int32_t hpm_dsp_rifft_f32(float32_t *src, uint32_t m)
rifft of f32 vectors.
Definition: hpm_math.h:5474
static int32_t hpm_dsp_cfft_rd4_q31(q31_t *src, uint32_t m)
cfft_rd4 of q31 vectors.
Definition: hpm_math.h:5203
static int32_t hpm_dsp_rifft_q15(q15_t *src, uint32_t m)
rifft of q15 vectors.
Definition: hpm_math.h:5528
static int32_t hpm_dsp_cifft_rd2_q31(q31_t *src, uint32_t m)
cfft_rd2 of q31 vectors.
Definition: hpm_math.h:5071
static void hpm_dsp_cfft_q15(q15_t *src, uint32_t m)
cfft of q15 vectors.
Definition: hpm_math.h:5333
static void hpm_dsp_cfft_f32(float32_t *src, uint32_t m)
cfft of f32 vectors.
Definition: hpm_math.h:5258
static int32_t hpm_dsp_rifft_q31(q31_t *src, uint32_t m)
rifft of q31 vectors.
Definition: hpm_math.h:5568
static void hpm_dsp_dct_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5663
static void hpm_dsp_idct_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5644
static void hpm_dsp_idct_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5606
static void hpm_dsp_idct4_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5720
static int32_t hpm_dsp_rfft_f64(float64_t *src, uint32_t m)
rfft of f64 vectors.
Definition: hpm_math.h:5460
static int32_t hpm_dsp_cfft_rd4_q15(q15_t *src, uint32_t m)
cfft_rd4 of q15 vectors.
Definition: hpm_math.h:5155
static void hpm_dsp_cifft_q31(q31_t *src, uint32_t m)
cifft of q31 vectors.
Definition: hpm_math.h:5402
static int32_t hpm_dsp_cifft_rd4_f32(float32_t *src, uint32_t m)
cifft_rd4 of f32 vectors.
Definition: hpm_math.h:5131
static void hpm_dsp_dct_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5593
static void hpm_dsp_cifft_f64(float64_t *src, uint32_t m)
cifft of f64 vectors.
Definition: hpm_math.h:5309
static void hpm_dsp_cfft_q31(q31_t *src, uint32_t m)
cfft of q31 vectors.
Definition: hpm_math.h:5379
static int32_t hpm_dsp_cifft_rd2_f32(float32_t *src, uint32_t m)
cifft_rd2 of f32 vectors.
Definition: hpm_math.h:4973
static int32_t hpm_dsp_cfft_rd2_q31(q31_t *src, uint32_t m)
cfft_rd2 of q31 vectors.
Definition: hpm_math.h:5046
static void hpm_dsp_idct_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5682
static void hpm_dsp_dct_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5625
static int32_t hpm_dsp_rfft_q31(q31_t *src, uint32_t m)
rfft of q31 vectors.
Definition: hpm_math.h:5548
static int32_t hpm_dsp_rfft_q15(q15_t *src, uint32_t m)
rfft of q15 vectors.
Definition: hpm_math.h:5508
static void hpm_dsp_idct4_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5758
static void hpm_dsp_dct4_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5739
static int32_t hpm_dsp_rifft_f64(float64_t *src, uint32_t m)
rifft of f64 vectors.
Definition: hpm_math.h:5488
static int32_t hpm_dsp_cfft_rd2_q15(q15_t *src, uint32_t m)
cfft_rd2 of q15 vectors.
Definition: hpm_math.h:4998
static int32_t hpm_dsp_cifft_rd4_q31(q31_t *src, uint32_t m)
cifft_rd4 of q31 vectors.
Definition: hpm_math.h:5227
void hpm_software_cfft_float(float *src, uint32_t m)
Software implementation does not depend on any hardware.
static int32_t hpm_dsp_cfft_rd4_f32(float32_t *src, uint32_t m)
cfft_rd4 of f32 vectors.
Definition: hpm_math.h:5112
static int32_t hpm_dsp_cifft_rd2_q15(q15_t *src, uint32_t m)
cifft_rd2 of q15 vectors.
Definition: hpm_math.h:5022
static int32_t hpm_dsp_rfft_f32(float32_t *src, uint32_t m)
rfft of f32 vectors.
Definition: hpm_math.h:5442
static void hpm_dsp_dct4_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5707
static void hpm_dsp_dup_f32(float32_t *src, float32_t *dst, uint32_t size)
Duplicate the floating vector.
Definition: hpm_math.h:6264
static void hpm_dsp_set_f32(float32_t val, float32_t *dst, uint32_t size)
Set the floating-point vector.
Definition: hpm_math.h:6317
static float32_t hpm_dsp_atan2_f32(float32_t srcy, float32_t src2)
Definition: hpm_math.h:6036
static void hpm_dsp_convert_q31_q15(q31_t *src, q15_t *dst, uint32_t size)
Convert a Q31 vector to Q15.
Definition: hpm_math.h:6198
static void hpm_dsp_set_q15(q15_t val, q15_t *dst, uint32_t size)
Set the Q15 vector.
Definition: hpm_math.h:6330
static float32_t hpm_dsp_exp_f32(float32_t src)
Calculate exponential value of f32 vector.
Definition: hpm_math.h:6399
static float32_t hpm_dsp_sin_f32(float32_t src)
Definition: hpm_math.h:5984
static float32_t hpm_dsp_sigmoid_f32(float32_t src)
Calculate sigmoid value of f32 vector.
Definition: hpm_math.h:6425
static void hpm_dsp_convert_q31_f32(q31_t *src, float32_t *dst, uint32_t size)
Convert a Q31 vector to floating.
Definition: hpm_math.h:6181
static q15_t hpm_dsp_atan_q15(q15_t src)
Definition: hpm_math.h:6030
static q31_t hpm_dsp_sin_q31(q31_t src)
Definition: hpm_math.h:6004
static void hpm_dsp_convert_f32_q31(float32_t *src, q31_t *dst, uint32_t size)
Convert a floating-point vector to Q31.
Definition: hpm_math.h:6112
static q31_t hpm_dsp_cos_q31(q31_t src)
Definition: hpm_math.h:5971
static q31_t hpm_dsp_atan2_q31(q31_t srcy, q31_t src2)
Definition: hpm_math.h:6048
static void hpm_dsp_dup_q31(q31_t *src, q31_t *dst, uint32_t size)
Duplicate the Q31 vector.
Definition: hpm_math.h:6290
static void hpm_dsp_convert_q15_q7(q15_t *src, q7_t *dst, uint32_t size)
Convert a Q15 vector to Q7.
Definition: hpm_math.h:6168
static void hpm_dsp_set_q31(q31_t val, q31_t *dst, uint32_t size)
Set the Q31 vector.
Definition: hpm_math.h:6343
static q15_t hpm_dsp_sin_q15(q15_t src)
Definition: hpm_math.h:6010
static void hpm_dsp_dup_q15(q15_t *src, q15_t *dst, uint32_t size)
Duplicate the Q15 vector.
Definition: hpm_math.h:6277
static void hpm_dsp_convert_q15_q31(q15_t *src, q31_t *dst, uint32_t size)
Convert a Q15 vector to Q31.
Definition: hpm_math.h:6155
static q15_t hpm_dsp_cos_q15(q15_t src)
Definition: hpm_math.h:5977
static float32_t hpm_dsp_cos_f32(float32_t src)
Definition: hpm_math.h:5965
static void hpm_dsp_barycenter_f32(const float32_t *src, const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
Barycenter of the floating-potint type.
Definition: hpm_math.h:6387
static q15_t hpm_dsp_atan2_q15(q15_t srcy, q15_t src2)
Definition: hpm_math.h:6042
static void hpm_dsp_convert_q7_q15(q7_t *src, q15_t *dst, uint32_t size)
Convert a Q7 vector to Q15.
Definition: hpm_math.h:6237
static void hpm_dsp_convert_q31_q7(q31_t *src, q7_t *dst, uint32_t size)
Convert a Q31 vector to Q7.
Definition: hpm_math.h:6211
static void hpm_dsp_convert_f32_q15(float32_t *src, q15_t *dst, uint32_t size)
Convert a floating-point vector to Q15.
Definition: hpm_math.h:6099
static void hpm_dsp_set_q7(q7_t val, q7_t *dst, uint32_t size)
Set the Q7 vector.
Definition: hpm_math.h:6356
static q31_t hpm_dsp_atan_q31(q31_t src)
Definition: hpm_math.h:6024
static void hpm_dsp_convert_q7_q31(q7_t *src, q31_t *dst, uint32_t size)
Convert a Q7 vector to Q31.
Definition: hpm_math.h:6250
static void hpm_dsp_convert_q7_f32(q7_t *src, float32_t *dst, uint32_t size)
Convert a Q7 vector to floating.
Definition: hpm_math.h:6224
static float32_t hpm_dsp_log_f32(float32_t src)
Calculate the natural logarithm value of f32 vector.
Definition: hpm_math.h:6451
static void hpm_dsp_convert_q15_f32(q15_t *src, float32_t *dst, uint32_t size)
Convert a Q15 vector to floating.
Definition: hpm_math.h:6142
static float32_t hpm_dsp_weighted_sum_f32(const float32_t *src, const float32_t *weight, uint32_t size)
Weighted Sum of the floating-potint vector.
Definition: hpm_math.h:6371
static void hpm_dsp_convert_f32_q7(float32_t *src, q7_t *dst, uint32_t size)
Convert a floating-point vector to Q7.
Definition: hpm_math.h:6129
static q31_t hpm_dsp_sqrt_q31(q31_t src)
Square root of the q31 input.
Definition: hpm_math.h:6073
static float32_t hpm_dsp_sqrt_f32(float32_t src)
Square root of the floating-potint input.
Definition: hpm_math.h:6061
static void hpm_dsp_dup_q7(q7_t *src, q7_t *dst, uint32_t size)
Duplicate the Q7 vector.
Definition: hpm_math.h:6303
static q15_t hpm_dsp_sqrt_q15(q15_t src)
Square root of the q15 input.
Definition: hpm_math.h:6085
static float32_t hpm_dsp_atan_f32(float32_t src)
Definition: hpm_math.h:6018
uint32_t hpm_math_sw_reverse_bit32_msb_to_lsb(uint32_t msb)
Reserve 32bit data msb to lsb.
uint8_t hpm_math_sw_reverse_bit8_msb_to_lsb(uint8_t msb)
Reserve 8bit data msb to lsb.
uint32_t hpm_math_sw_reverse_bit32_lsb_to_msb(uint32_t lsb)
Reserve 32bit data lsb to msb.
uint8_t hpm_math_sw_reverse_bit8_lsb_to_msb(uint8_t lsb)
Reserve 8bit data lsb to msb.
FFT transform context.
Definition: hpm_ffa_drv.h:75
void * dst
Definition: hpm_ffa_drv.h:81
uint8_t dst_data_type
Definition: hpm_ffa_drv.h:78
uint8_t src_data_type
Definition: hpm_ffa_drv.h:77
const void * src
Definition: hpm_ffa_drv.h:80
uint32_t num_points
Definition: hpm_ffa_drv.h:79
uint16_t is_ifft
Definition: hpm_ffa_drv.h:76