HPM SDK
HPMicro Software Development Kit
hpm_math.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022,2024 HPMicro
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  */
7 
8 #ifndef __HPM_MATH_H__
9 #define __HPM_MATH_H__
10 
11 #include <stddef.h>
17 #define HPM_DSP_HW_NDS32 1 /* andes hardware dsp */
18 
19 #ifdef CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
20 #include CONFIG_HPM_MATH_HAS_EXTRA_CONFIG
21 #else
22 
23 /* Enable Compute Cell Library*/
24 /* #define HPM_EN_MATH_FFA_LIB */
25 /* #define HPM_EN_MATH_DSP_LIB */
26 /* #define HPM_EN_MATH_NN_LIB */
27 
28 #define HPM_MATH_DSP_STATISTICS 1
29 #define HPM_MATH_DSP_BASIC 1
30 #define HPM_MATH_DSP_COMPLEX 1
31 #define HPM_MATH_DSP_CONTROLLER 1
32 #define HPM_MATH_DSP_DISTANCE 1
33 #define HPM_MATH_DSP_FILTERING 1
34 #define HPM_MATH_DSP_MATRIX 1
35 #define HPM_MATH_DSP_SVM 1
36 #define HPM_MATH_DSP_TRANSFORM 1
37 #define HPM_MATH_DSP_UTILS 1
38 #define HPM_MATH_DSP_SORT 1
39 
40 #define HPM_MATH_NN_ACTIVATION 1
41 #define HPM_MATH_NN_TINYENGINE 1
42 #define HPM_MATH_NN_BASIC 1
43 #define HPM_MATH_NN_CONCATENATION 1
44 #define HPM_MATH_NN_CONVOLUTION 1
45 #define HPM_MATH_NN_CONNECTED 1
46 #define HPM_MATH_NN_POOLING 1
47 #define HPM_MATH_NN_SOFTMAX 1
48 #define HPM_MATH_NN_UTIL 1
49 
50 #define HPM_DSP_CORE HPM_DSP_HW_NDS32 /* DSP core selection */
51 
52 #define HPM_MATH_PI (3.14159265358979323846)
53 
62 #define HPM_MATH_SW_FFT_CHECKLIST
63 
64 #endif
65 
66 #ifdef __cplusplus
67 extern "C"
68 {
69 #endif
70 
71 #ifdef HPM_MATH_DSP_STATISTICS
72 
79 #ifdef HPM_EN_MATH_DSP_LIB
80 
81 #ifdef __zcc__
82 #include "tpt_math.h"
83 #endif
84 
85 #include "riscv_dsp_statistics_math.h"
86 
87 // Maximum
95 static inline float32_t hpm_dsp_max_f32(const float32_t *src, uint32_t size, uint32_t *index)
96 {
97 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
98 #ifdef __zcc__
99  f32_t res;
100  tpt_max_f32(&res, index, src, size);
101  return res;
102 #else
103  return riscv_dsp_max_f32(src, size, index);
104 #endif
105 #endif
106 }
107 static inline float32_t hpm_dsp_max_val_f32(const float32_t *src, uint32_t size)
108 {
109 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
110  return riscv_dsp_max_val_f32(src, size);
111 #endif
112 }
120 static inline q15_t hpm_dsp_max_q15(const q15_t *src, uint32_t size, uint32_t *index)
121 {
122 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
123 #ifdef __zcc__
124  q15_t res;
125  tpt_max_q15(&res, index, src, size);
126  return res;
127 #else
128  return riscv_dsp_max_q15(src, size, index);
129 #endif
130 #endif
131 }
132 
140 static inline q31_t hpm_dsp_max_q31(const q31_t *src, uint32_t size, uint32_t *index)
141 {
142 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
143  #ifdef __zcc__
144  q31_t res;
145  tpt_max_q31(&res, index, src, size);
146  return res;
147 #else
148  return riscv_dsp_max_q31(src, size, index);
149 #endif
150 #endif
151 }
152 
160 static inline q7_t hpm_dsp_max_q7(const q7_t *src, uint32_t size, uint32_t *index)
161 {
162 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
163 #ifdef __zcc__
164  q7_t res;
165  tpt_max_q7(&res, index, src, size);
166  return res;
167 #else
168  return riscv_dsp_max_q7(src, size, index);
169 #endif
170 #endif
171 }
172 
180 static inline uint8_t hpm_dsp_max_u8(const uint8_t *src, uint32_t size, uint32_t *index)
181 {
182 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
183  return riscv_dsp_max_u8(src, size, index);
184 #endif
185 }
186 
187 // Minimum
195 static inline float32_t hpm_dsp_min_f32(const float32_t *src, uint32_t size, uint32_t *index)
196 {
197 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
198 #ifdef __zcc__
199  f32_t res;
200  tpt_min_f32(&res, index, src, size);
201  return res;
202 #else
203  return riscv_dsp_min_f32(src, size, index);
204 #endif
205 #endif
206 }
207 
215 static inline q15_t hpm_dsp_min_q15(const q15_t *src, uint32_t size, uint32_t *index)
216 {
217 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
218 #ifdef __zcc__
219  q15_t res;
220  tpt_min_q15(&res, index, src, size);
221  return res;
222 #else
223  return riscv_dsp_min_q15(src, size, index);
224 #endif
225 #endif
226 }
227 
235 static inline q31_t hpm_dsp_min_q31(const q31_t *src, uint32_t size, uint32_t *index)
236 {
237 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
238 #ifdef __zcc__
239  q31_t res;
240  tpt_min_q31(&res, index, src, size);
241  return res;
242 #else
243  return riscv_dsp_min_q31(src, size, index);
244 #endif
245 #endif
246 }
247 
255 static inline q7_t hpm_dsp_min_q7(const q7_t *src, uint32_t size, uint32_t *index)
256 {
257 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
258 #ifdef __zcc__
259  q7_t res;
260  tpt_min_q7(&res, index, src, size);
261  return res;
262 #else
263  return riscv_dsp_min_q7(src, size, index);
264 #endif
265 #endif
266 }
267 
275 static inline uint8_t hpm_dsp_min_u8(const uint8_t *src, uint32_t size, uint32_t *index)
276 {
277 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
278  return riscv_dsp_min_u8(src, size, index);
279 #endif
280 }
281 
282 // Mean
289 static inline float32_t hpm_dsp_mean_f32(const float32_t *src, uint32_t size)
290 {
291 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
292 #ifdef __zcc__
293  f32_t res;
294  tpt_mean_f32(&res, src, size);
295  return res;
296 #else
297  return riscv_dsp_mean_f32(src, size);
298 #endif
299 #endif
300 }
301 
313 static inline q15_t hpm_dsp_mean_q15(const q15_t *src, uint32_t size)
314 {
315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
316 #ifdef __zcc__
317  q15_t res;
318  tpt_mean_q15(&res, src, size);
319  return res;
320 #else
321  return riscv_dsp_mean_q15(src, size);
322 #endif
323 #endif
324 }
325 
337 static inline q31_t hpm_dsp_mean_q31(const q31_t *src, uint32_t size)
338 {
339 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
340 #ifdef __zcc__
341  q31_t res;
342  tpt_mean_q31(&res, src, size);
343  return res;
344 #else
345  return riscv_dsp_mean_q31(src, size);
346 #endif
347 #endif
348 }
349 
361 static inline q7_t hpm_dsp_mean_q7(const q7_t *src, uint32_t size)
362 {
363 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
364 #ifdef __zcc__
365  q7_t res;
366  tpt_mean_q7(&res, src, size);
367  return res;
368 #else
369  return riscv_dsp_mean_q7(src, size);
370 #endif
371 #endif
372 }
373 
383 static inline uint8_t hpm_dsp_mean_u8(const uint8_t *src, uint32_t size)
384 {
385 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
386  return riscv_dsp_mean_u8(src, size);
387 #endif
388 }
389 
390 // Sun of the Squares
397 static inline float32_t hpm_dsp_pwr_f32(const float32_t *src, uint32_t size)
398 {
399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
400 #ifdef __zcc__
401  f32_t res;
402  tpt_power_f32(&res, src, size);
403  return res;
404 #else
405  return riscv_dsp_pwr_f32(src, size);
406 #endif
407 #endif
408 }
409 
422 static inline q63_t hpm_dsp_pwr_q15(const q15_t *src, uint32_t size)
423 {
424 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
425 #ifdef __zcc__
426  q63_t res;
427  tpt_power_q15(&res, src, size);
428  return res;
429 #else
430  return riscv_dsp_pwr_q15(src, size);
431 #endif
432 #endif
433 }
434 
448 static inline q63_t hpm_dsp_pwr_q31(const q31_t *src, uint32_t size)
449 {
450 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
451 #ifdef __zcc__
452  q63_t res;
453  tpt_power_q31(&res, src, size);
454  return res;
455 #else
456  return riscv_dsp_pwr_q31(src, size);
457 #endif
458 #endif
459 }
460 
473 static inline q31_t hpm_dsp_pwr_q7(const q7_t *src, uint32_t size)
474 {
475 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
476 #ifdef __zcc__
477  q31_t res;
478  tpt_power_q7(&res, src, size);
479  return res;
480 #else
481  return riscv_dsp_pwr_q7(src, size);
482 #endif
483 #endif
484 }
485 
486 // Root Mean Square
493 static inline float32_t hpm_dsp_rms_f32(const float32_t *src, uint32_t size)
494 {
495 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
496 #ifdef __zcc__
497  f32_t res;
498  tpt_rms_f32(&res, src, size);
499  return res;
500 #else
501  return riscv_dsp_rms_f32(src, size);
502 #endif
503 #endif
504 }
505 
519 static inline q15_t hpm_dsp_rms_q15(const q15_t *src, uint32_t size)
520 {
521 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
522 #ifdef __zcc__
523  q15_t res;
524  tpt_rms_q15(&res, src, size);
525  return res;
526 #else
527  return riscv_dsp_rms_q15(src, size);
528 #endif
529 #endif
530 }
531 
545 static inline q31_t hpm_dsp_rms_q31(const q31_t *src, uint32_t size)
546 {
547 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
548 #ifdef __zcc__
549  q31_t res;
550  tpt_rms_q31(&res, src, size);
551  return res;
552 #else
553  return riscv_dsp_rms_q31(src, size);
554 #endif
555 #endif
556 }
557 
558 // Standard deviation
565 static inline float32_t hpm_dsp_std_f32(const float32_t *src, uint32_t size)
566 {
567 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
568 #ifdef __zcc__
569  f32_t res;
570  tpt_std_f32(&res, src, size);
571  return res;
572 #else
573  return riscv_dsp_std_f32(src, size);
574 #endif
575 #endif
576 }
577 
591 static inline q15_t hpm_dsp_std_q15(const q15_t *src, uint32_t size)
592 {
593 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
594 #ifdef __zcc__
595  q15_t res;
596  tpt_std_q15(&res, src, size);
597  return res;
598 #else
599  return riscv_dsp_std_q15(src, size);
600 #endif
601 #endif
602 }
603 
617 static inline q31_t hpm_dsp_std_q31(const q31_t *src, uint32_t size)
618 {
619 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
620 #ifdef __zcc__
621  q31_t res;
622  tpt_std_q31(&res, src, size);
623  return res;
624 #else
625  return riscv_dsp_std_q31(src, size);
626 #endif
627 #endif
628 }
629 
642 static inline q15_t hpm_dsp_std_u8(const uint8_t *src, uint32_t size)
643 {
644 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
645  return riscv_dsp_std_u8(src, size);
646 #endif
647 }
648 
649 // Variance
656 static inline float32_t hpm_dsp_var_f32(const float32_t *src, uint32_t size)
657 {
658 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
659 #ifdef __zcc__
660  f32_t res;
661  tpt_var_f32(&res, src, size);
662  return res;
663 #else
664  return riscv_dsp_var_f32(src, size);
665 #endif
666 #endif
667 }
668 
682 static inline q31_t hpm_dsp_var_q15(const q15_t *src, uint32_t size)
683 {
684 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
685 #ifdef __zcc__
686  q15_t res;
687  tpt_var_q15(&res, src, size);
688  return res;
689 #else
690  return riscv_dsp_var_q15(src, size);
691 #endif
692 #endif
693 }
694 
708 static inline q63_t hpm_dsp_var_q31(const q31_t *src, uint32_t size)
709 {
710 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
711 #ifdef __zcc__
712  q31_t res;
713  tpt_var_q31(&res, src, size);
714  return res;
715 #else
716  return riscv_dsp_var_q31(src, size);
717 #endif
718 #endif
719 }
720 
729 static inline float32_t hpm_dsp_entropy_f32(const float32_t *src, uint32_t size)
730 {
731 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
732 #ifdef __zcc__
733  return tpt_entropy_f32(src, size);
734 #else
735  return riscv_dsp_entropy_f32(src, size);
736 #endif
737 #endif
738 }
739 
740 
752 static inline float32_t hpm_dsp_relative_entropy_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
753 {
754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
755 #ifdef __zcc__
756  return tpt_relative_entropy_f32(src1, src2, size);
757 #else
758  return riscv_dsp_relative_entropy_f32(src1, src2, size);
759 #endif
760 #endif
761 }
762 
770 static inline float32_t hpm_dsp_lse_f32(const float32_t *src, uint32_t size)
771 {
772 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
773 #ifdef __zcc__
774  tpt_lse_f32(src, size);
775 #else
776  return riscv_dsp_lse_f32(src, size);
777 #endif
778 #endif
779 }
780 
790 static inline float32_t hpm_dsp_lse_dprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *buffer)
791 {
792 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
793 #ifdef __zcc__
794  return tpt_lse_dprod_f32(src1, src2, size, buffer);
795 #else
796  return riscv_dsp_lse_dprod_f32(src1, src2, size, buffer);
797 #endif
798 #endif
799 }
800 
810 static inline uint32_t hpm_dsp_gaussian_naive_bayes_est_f32(const riscv_dsp_gaussian_naivebayes_f32_t *instance, const float32_t * src, float32_t *buf)
811 {
812 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
813  return riscv_dsp_gaussian_naive_bayes_est_f32(instance, src, buf);
814 #endif
815 }
816 
824 static inline float32_t hpm_dsp_absmax_f32(const float32_t* src, uint32_t size, uint32_t* index)
825 {
826 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
827  return riscv_dsp_absmax_f32(src, size, index);
828 #endif
829 }
830 
838 static inline q15_t hpm_dsp_absmax_q15(const q15_t* src, uint32_t size, uint32_t* index)
839 {
840 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
841  return riscv_dsp_absmax_q15(src, size, index);
842 #endif
843 }
844 
852 static inline q31_t hpm_dsp_absmax_q31(const q31_t* src, uint32_t size, uint32_t* index)
853 {
854 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
855  return riscv_dsp_absmax_q31(src, size, index);
856 #endif
857 }
858 
866 static inline q7_t hpm_dsp_absmax_q7(const q7_t* src, uint32_t size, uint32_t* index)
867 {
868 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
869  return riscv_dsp_absmax_q7(src, size, index);
870 #endif
871 }
872 
880 static inline float32_t hpm_dsp_absmin_f32(const float32_t* src, uint32_t size, uint32_t* index)
881 {
882 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
883  return riscv_dsp_absmin_f32(src, size, index);
884 #endif
885 }
886 
894 static inline q31_t hpm_dsp_absmin_q31(const q31_t* src, uint32_t size, uint32_t* index)
895 {
896 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
897  return riscv_dsp_absmin_q31(src, size, index);
898 #endif
899 }
900 
908 static inline q15_t hpm_dsp_absmin_q15(const q15_t* src, uint32_t size, uint32_t* index)
909 {
910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
911  return riscv_dsp_absmin_q15(src, size, index);
912 #endif
913 }
914 
922 static inline q7_t hpm_dsp_absmin_q7(const q7_t* src, uint32_t size, uint32_t* index)
923 {
924 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
925  return riscv_dsp_absmin_q7(src, size, index);
926 #endif
927 }
928 
929 #endif
930 #endif
931 
937 #ifdef HPM_MATH_DSP_BASIC
938 
944 #ifdef HPM_EN_MATH_DSP_LIB
945 
946 #ifdef __zcc__
947 #include "tpt_math.h"
948 #endif
949 
950 #include "riscv_dsp_basic_math.h"
951 
952 // Absolute value
959 static inline void hpm_dsp_abs_f32(float32_t *src, float32_t *dst, uint32_t size)
960 {
961 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
962 #ifdef __zcc__
963  tpt_abs_f32(dst, src, size);
964 #else
965  riscv_dsp_abs_f32(src, dst, size);
966 #endif
967 #endif
968 }
969 
979 static inline void hpm_dsp_abs_q31(q31_t *src, q31_t *dst, uint32_t size)
980 {
981 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
982 #ifdef __zcc__
983  tpt_abs_q31(dst, src, size);
984 #else
985  riscv_dsp_abs_q31(src, dst, size);
986 #endif
987 
988 #endif
989 }
990 
1000 static inline void hpm_dsp_abs_q15(q15_t *src, q15_t *dst, uint32_t size)
1001 {
1002 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1003 #ifdef __zcc__
1004  tpt_abs_q15(dst, src, size);
1005 #else
1006  riscv_dsp_abs_q15(src, dst, size);
1007 #endif
1008 #endif
1009 }
1010 
1020 static inline void hpm_dsp_abs_q7(q7_t *src, q7_t *dst, uint32_t size)
1021 {
1022 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1023 #ifdef __zcc__
1024  tpt_abs_q7(dst, src, size);
1025 #else
1026  riscv_dsp_abs_q7(src, dst, size);
1027 #endif
1028 #endif
1029 }
1030 
1031 // Addition
1039 static inline void hpm_dsp_add_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1040 {
1041 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1042 #ifdef __zcc__
1043  tpt_add_f32(dst, src1, src2, size);
1044 #else
1045  riscv_dsp_add_f32(src1, src2, dst, size);
1046 #endif
1047 #endif
1048 }
1049 
1059 static inline void hpm_dsp_add_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
1060 {
1061 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1062 #ifdef __zcc__
1063  tpt_add_q31(dst, src1, src2, size);
1064 #else
1065  riscv_dsp_add_q31(src1, src2, dst, size);
1066 #endif
1067 #endif
1068 }
1069 
1079 static inline void hpm_dsp_add_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
1080 {
1081 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1082 #ifdef __zcc__
1083  tpt_add_q15(dst, src1, src2, size);
1084 #else
1085  riscv_dsp_add_q15(src1, src2, dst, size);
1086 #endif
1087 #endif
1088 }
1089 
1099 static inline void hpm_dsp_add_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
1100 {
1101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1102 #ifdef __zcc__
1103  tpt_add_q7(dst, src1, src2, size);
1104 #else
1105  riscv_dsp_add_q7(src1, src2, dst, size);
1106 #endif
1107 #endif
1108 }
1109 
1119 static inline void hpm_dsp_add_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
1120 {
1121 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1122 #ifdef __zcc__
1123  tpt_add_u8_u16(dst, src1, src2, size);
1124 #else
1125  riscv_dsp_add_u8_u16(src1, src2, dst, size);
1126 #endif
1127 #endif
1128 }
1129 
1130 // Subtraction
1138 static inline void hpm_dsp_sub_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1139 {
1140 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1141 #ifdef __zcc__
1142  tpt_sub_f32(dst, src1, src2, size);
1143 #else
1144  riscv_dsp_sub_f32(src1, src2, dst, size);
1145 #endif
1146 #endif
1147 }
1148 
1158 static inline void hpm_dsp_sub_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
1159 {
1160 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1161 #ifdef __zcc__
1162  tpt_sub_q31(dst, src1, src2, size);
1163 #else
1164  riscv_dsp_sub_q31(src1, src2, dst, size);
1165 #endif
1166 #endif
1167 }
1168 
1178 static inline void hpm_dsp_sub_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
1179 {
1180 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1181 #ifdef __zcc__
1182  tpt_sub_q15(dst, src1, src2, size);
1183 #else
1184  riscv_dsp_sub_q15(src1, src2, dst, size);
1185 #endif
1186 #endif
1187 }
1188 
1198 static inline void hpm_dsp_sub_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
1199 {
1200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1201 #ifdef __zcc__
1202  tpt_sub_q7(dst, src1, src2, size);
1203 #else
1204  riscv_dsp_sub_q7(src1, src2, dst, size);
1205 #endif
1206 #endif
1207 }
1208 
1218 static inline void hpm_dsp_sub_u8_q7(uint8_t *src1, uint8_t *src2, q7_t *dst, uint32_t size)
1219 {
1220 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1221  riscv_dsp_sub_u8_q7(src1, src2, dst, size);
1222 #endif
1223 }
1224 
1225 // Multiplication
1233 static inline void hpm_dsp_mul_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1234 {
1235 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1236 #ifdef __zcc__
1237  tpt_mult_f32(dst, src1, src2, size);
1238 #else
1239  riscv_dsp_mul_f32(src1, src2, dst, size);
1240 #endif
1241 #endif
1242 }
1243 
1253 static inline void hpm_dsp_mul_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
1254 {
1255 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1256 #ifdef __zcc__
1257  tpt_mult_q31(dst, src1, src2, size);
1258 #else
1259  riscv_dsp_mul_q31(src1, src2, dst, size);
1260 #endif
1261 #endif
1262 }
1263 
1273 static inline void hpm_dsp_mul_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
1274 {
1275 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1276 #ifdef __zcc__
1277  tpt_mult_q15(dst, src1, src2, size);
1278 #else
1279  riscv_dsp_mul_q15(src1, src2, dst, size);
1280 #endif
1281 #endif
1282 }
1283 
1293 static inline void hpm_dsp_mul_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
1294 {
1295 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1296 #ifdef __zcc__
1297  tpt_mult_q7(dst, src1, src2, size);
1298 #else
1299  riscv_dsp_mul_q7(src1, src2, dst, size);
1300 #endif
1301 #endif
1302 }
1303 
1313 static inline void hpm_dsp_mul_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
1314 {
1315 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1316  riscv_dsp_mul_u8_u16(src1, src2, dst, size);
1317 #endif
1318 }
1319 
1320 // Division
1328 static inline void hpm_dsp_div_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
1329 {
1330 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1331 #ifdef __zcc__
1332  tpt_div_f32(dst, src1, src2, size);
1333 #else
1334  riscv_dsp_div_f32(src1, src2, dst, size);
1335 #endif
1336 #endif
1337 }
1338 
1345 static inline q31_t hpm_dsp_div_q31(q31_t src1, q31_t src2)
1346 {
1347 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1348 #ifdef __zcc__
1349  return tpt_div_q31(src1, src2);
1350 #else
1351  return riscv_dsp_div_q31(src1, src2);
1352 #endif
1353 #endif
1354 }
1355 
1362 static inline q31_t hpm_dsp_div_s64_u32(q63_t src1, uint32_t src2)
1363 {
1364 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1365 #ifdef __zcc__
1366  return tpt_div_s64_u32(src1, src2);
1367 #else
1368  return riscv_dsp_div_s64_u32(src1, src2);
1369 #endif
1370 #endif
1371 }
1372 
1379 static inline q31_t hpm_dsp_div_u64_u32(uint64_t src1, uint32_t src2)
1380 {
1381 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1382 #ifdef __zcc__
1383  return tpt_div_u64_u32(src1, src2);
1384 #else
1385  return riscv_dsp_div_u64_u32(src1, src2);
1386 #endif
1387 #endif
1388 }
1389 
1390 // Negation
1397 static inline void hpm_dsp_neg_f32(float32_t *src, float32_t *dst, uint32_t size)
1398 {
1399 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1400 #ifdef __zcc__
1401  tpt_negate_f32(dst, src, size);
1402 #else
1403  riscv_dsp_neg_f32(src, dst, size);
1404 #endif
1405 #endif
1406 }
1407 
1417 static inline void hpm_dsp_neg_q31(q31_t *src, q31_t *dst, uint32_t size)
1418 {
1419 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1420 #ifdef __zcc__
1421  tpt_negate_q31(dst, src, size);
1422 #else
1423  riscv_dsp_neg_q31(src, dst, size);
1424 #endif
1425 #endif
1426 }
1427 
1437 static inline void hpm_dsp_neg_q15(q15_t *src, q15_t *dst, uint32_t size)
1438 {
1439 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1440 #ifdef __zcc__
1441  tpt_negate_q15(dst, src, size);
1442 #else
1443  riscv_dsp_neg_q15(src, dst, size);
1444 #endif
1445 #endif
1446 }
1447 
1457 static inline void hpm_dsp_neg_q7(q7_t *src, q7_t *dst, uint32_t size)
1458 {
1459 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1460 #ifdef __zcc__
1461  tpt_negate_q7(dst, src, size);
1462 #else
1463  riscv_dsp_neg_q7(src, dst, size);
1464 #endif
1465 #endif
1466 }
1467 
1468 // Dot Production
1476 static inline float32_t hpm_dsp_dprod_f32(float32_t *src1, float32_t *src2, uint32_t size)
1477 {
1478 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1479 #ifdef __zcc__
1480  f32_t res;
1481  tpt_dot_prod_f32(&res, src1, src2, size);
1482  return res;
1483 #else
1484  return riscv_dsp_dprod_f32(src1, src2, size);
1485 #endif
1486 #endif
1487 }
1488 
1501 static inline q63_t hpm_dsp_dprod_q31(q31_t *src1, q31_t *src2, uint32_t size)
1502 {
1503 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1504 #ifdef __zcc__
1505  q63_t res;
1506  tpt_dot_prod_q31(&res, src1, src2, size);
1507  return res;
1508 #else
1509  return riscv_dsp_dprod_q31(src1, src2, size);
1510 #endif
1511 #endif
1512 }
1513 
1524 static inline q63_t hpm_dsp_dprod_q15(q15_t *src1, q15_t *src2, uint32_t size)
1525 {
1526 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1527 #ifdef __zcc__
1528  q63_t res;
1529  tpt_dot_prod_q15(&res, src1, src2, size);
1530  return res;
1531 #else
1532  return riscv_dsp_dprod_q15(src1, src2, size);
1533 #endif
1534 #endif
1535 }
1536 
1548 static inline q31_t hpm_dsp_dprod_u8xq15(uint8_t *src1, q15_t *src2, uint32_t size)
1549 {
1550 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1551  return riscv_dsp_dprod_u8xq15(src1, src2, size);
1552 #endif
1553 }
1554 
1555 
1566 static inline q31_t hpm_dsp_dprod_q7(q7_t *src1, q7_t *src2, uint32_t size)
1567 {
1568 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1569 #ifdef __zcc__
1570  q31_t res;
1571  tpt_dot_prod_q7(&res, src1, src2, size);
1572  return res;
1573 #else
1574  return riscv_dsp_dprod_q7(src1, src2, size);
1575 #endif
1576 #endif
1577 }
1578 
1589 static inline q31_t hpm_dsp_dprod_q7xq15(q7_t *src1, q15_t *src2, uint32_t size)
1590 {
1591 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1592  return riscv_dsp_dprod_q7xq15(src1, src2, size);
1593 #endif
1594 }
1595 
1606 static inline uint32_t hpm_dsp_dprod_u8(uint8_t *src1, uint8_t *src2, uint32_t size)
1607 {
1608 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1609  return riscv_dsp_dprod_u8(src1, src2, size);
1610 #endif
1611 }
1612 
1613 // Offset
1621 static inline void hpm_dsp_offset_f32(float32_t *src, float32_t offset, float32_t *dst, uint32_t size)
1622 {
1623 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1624 #ifdef __zcc__
1625  tpt_offset_f32(dst, src, offset, size);
1626 #else
1627  riscv_dsp_offset_f32(src, offset, dst, size);
1628 #endif
1629 #endif
1630 }
1631 
1641 static inline void hpm_dsp_offset_q31(q31_t *src, q31_t offset, q31_t *dst, uint32_t size)
1642 {
1643 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1644 #ifdef __zcc__
1645  tpt_offset_q31(dst, src, offset, size);
1646 #else
1647  riscv_dsp_offset_q31(src, offset, dst, size);
1648 #endif
1649 #endif
1650 }
1651 
1661 static inline void hpm_dsp_offset_q15(q15_t *src, q15_t offset, q15_t *dst, uint32_t size)
1662 {
1663 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1664 #ifdef __zcc__
1665  tpt_offset_q15(dst, src, offset, size);
1666 #else
1667  riscv_dsp_offset_q15(src, offset, dst, size);
1668 #endif
1669 #endif
1670 }
1671 
1681 static inline void hpm_dsp_offset_q7(q7_t *src, q7_t offset, q7_t *dst, uint32_t size)
1682 {
1683 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1684 #ifdef __zcc__
1685  tpt_offset_q7(dst, src, offset, size);
1686 #else
1687  riscv_dsp_offset_q7(src, offset, dst, size);
1688 #endif
1689 #endif
1690 }
1691 
1701 static inline void hpm_dsp_offset_u8(uint8_t *src, q7_t offset, uint8_t *dst, uint32_t size)
1702 {
1703 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1704  riscv_dsp_offset_u8(src, offset, dst, size);
1705 #endif
1706 }
1707 
1708 // Scale
1716 static inline void hpm_dsp_scale_f32(float32_t *src, float32_t scale, float32_t *dst, uint32_t size)
1717 {
1718 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1719 #ifdef __zcc__
1720  tpt_scale_f32(dst, src, scale, size);
1721 #else
1722  riscv_dsp_scale_f32(src, scale, dst, size);
1723 #endif
1724 #endif
1725 }
1726 
1739 static inline void hpm_dsp_scale_q31(q31_t *src, q31_t scalefract, int8_t shift, q31_t *dst, uint32_t size)
1740 {
1741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1742 #ifdef __zcc__
1743  tpt_scale_q31(dst, src, scalefract, shift, size);
1744 #else
1745  riscv_dsp_scale_q31(src, scalefract, shift, dst, size);
1746 #endif
1747 #endif
1748 }
1749 
1762 static inline void hpm_dsp_scale_q15(q15_t *src, q15_t scalefract, int8_t shift, q15_t *dst, uint32_t size)
1763 {
1764 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1765 #ifdef __zcc__
1766  tpt_scale_q15(dst, src, scalefract, shift, size);
1767 #else
1768  riscv_dsp_scale_q15(src, scalefract, shift, dst, size);
1769 #endif
1770 #endif
1771 }
1772 
1785 static inline void hpm_dsp_scale_q7(q7_t *src, q7_t scalefract, int8_t shift, q7_t *dst, uint32_t size)
1786 {
1787 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1788 #ifdef __zcc__
1789  tpt_scale_q7(dst, src, scalefract, shift, size);
1790 #else
1791  riscv_dsp_scale_q7(src, scalefract, shift, dst, size);
1792 #endif
1793 #endif
1794 }
1795 
1807 static inline void hpm_dsp_scale_u8(uint8_t *src, q7_t scalefract, int8_t shift, uint8_t *dst, uint32_t size)
1808 {
1809 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1810  riscv_dsp_scale_u8(src, scalefract, shift, dst, size);
1811 #endif
1812 }
1813 
1814 // Shift
1825 static inline void hpm_dsp_shift_q15(q15_t *src, int8_t shift, q15_t *dst, uint32_t size)
1826 {
1827 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1828 #ifdef __zcc__
1829  tpt_shift_q15(dst, src, shift, size);
1830 #else
1831  riscv_dsp_shift_q15(src, shift, dst, size);
1832 #endif
1833 #endif
1834 }
1835 
1846 static inline void hpm_dsp_shift_q31(q31_t *src, int8_t shift, q31_t *dst, uint32_t size)
1847 {
1848 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1849 #ifdef __zcc__
1850  tpt_shift_q31(dst, src, shift, size);
1851 #else
1852  riscv_dsp_shift_q31(src, shift, dst, size);
1853 #endif
1854 #endif
1855 }
1856 
1867 static inline void hpm_dsp_shift_q7(q7_t *src, int8_t shift, q7_t *dst, uint32_t size)
1868 {
1869 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1870 #ifdef __zcc__
1871  tpt_shift_q7(dst, src, shift, size);
1872 #else
1873  riscv_dsp_shift_q7(src, shift, dst, size);
1874 #endif
1875 #endif
1876 }
1877 
1888 static inline void hpm_dsp_shift_u8(uint8_t *src, int8_t shift, uint8_t *dst, uint32_t size)
1889 {
1890 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1891  riscv_dsp_shift_u8(src, shift, dst, size);
1892 #endif
1893 }
1894 
1908 static inline void hpm_dsp_clip_f32(float32_t *src, float32_t *dst, float32_t low, float32_t high, uint32_t size)
1909 {
1910 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1911 #ifdef __zcc__
1912  tpt_clip_f32(dst, src, low, high, size);
1913 #else
1914  riscv_dsp_clip_f32(src, dst, low, high, size);
1915 #endif
1916 #endif
1917 }
1927 static inline void hpm_dsp_clip_q31(q31_t *src, q31_t *dst, q31_t low, q31_t high, uint32_t size)
1928 {
1929 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1930 #ifdef __zcc__
1931  tpt_clip_q31(dst, src, low, high, size);
1932 #else
1933  riscv_dsp_clip_q31(src, dst, low, high, size);
1934 #endif
1935 #endif
1936 }
1946 static inline void hpm_dsp_clip_q15(q15_t *src, q15_t *dst, q15_t low, q15_t high, uint32_t size)
1947 {
1948 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1949 #ifdef __zcc__
1950  tpt_clip_q15(dst, src, low, high, size);
1951 #else
1952  riscv_dsp_clip_q15(src, dst, low, high, size);
1953 #endif
1954 #endif
1955 }
1965 static inline void hpm_dsp_clip_q7(q7_t *src, q7_t *dst, q7_t low, q7_t high, uint32_t size)
1966 {
1967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
1968 #ifdef __zcc__
1969  tpt_clip_q7(dst, src, low, high, size);
1970 #else
1971  riscv_dsp_clip_q7(src, dst, low, high, size);
1972 #endif
1973 #endif
1974 }
1977 // AND
1998 static inline void hpm_dsp_and_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
1999 {
2000 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2001 #ifdef __zcc__
2002  tpt_and_32bit(dst, src1, src2, size);
2003 #else
2004  riscv_dsp_and_u32(src1, src2, dst, size);
2005 #endif
2006 #endif
2007 }
2016 static inline void hpm_dsp_and_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
2017 {
2018 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2019 #ifdef __zcc__
2020  tpt_and_8bit(dst, src1, src2, size);
2021 #else
2022  riscv_dsp_and_u8(src1, src2, dst, size);
2023 #endif
2024 #endif
2025 }
2028 // OR
2049 static inline void hpm_dsp_or_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
2050 {
2051 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2052 #ifdef __zcc__
2053  tpt_or_32bit(dst, src1, src2, size);
2054 #else
2055  riscv_dsp_or_u32(src1, src2, dst, size);
2056 #endif
2057 #endif
2058 }
2067 static inline void hpm_dsp_or_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
2068 {
2069 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2070 #ifdef __zcc__
2071  tpt_or_16bit(dst, src1, src2, size);
2072 #else
2073  riscv_dsp_or_u16(src1, src2, dst, size);
2074 #endif
2075 #endif
2076 }
2085 static inline void hpm_dsp_or_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
2086 {
2087 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2088 #ifdef __zcc__
2089  tpt_or_8bit(dst, src1, src2, size);
2090 #else
2091  riscv_dsp_or_u8(src1, src2, dst, size);
2092 #endif
2093 #endif
2094 }
2097 // XOR
2118 static inline void hpm_dsp_xor_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
2119 {
2120 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2121 #ifdef __zcc__
2122  tpt_xor_32bit(dst, src1, src2, size);
2123 #else
2124  riscv_dsp_xor_u32(src1, src2, dst, size);
2125 #endif
2126 #endif
2127 }
2136 static inline void hpm_dsp_xor_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
2137 {
2138 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2139 #ifdef __zcc__
2140  tpt_xor_16bit(dst, src1, src2, size);
2141 #else
2142  riscv_dsp_xor_u16(src1, src2, dst, size);
2143 #endif
2144 #endif
2145 }
2154 static inline void hpm_dsp_xor_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
2155 {
2156 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2157 #ifdef __zcc__
2158  tpt_xor_8bit(dst, src1, src2, size);
2159 #else
2160  riscv_dsp_xor_u8(src1, src2, dst, size);
2161 #endif
2162 #endif
2163 }
2166 // Not
2186 static inline void hpm_dsp_not_u32(u32_t *src, u32_t *dst, uint32_t size)
2187 {
2188 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2189 #ifdef __zcc__
2190  tpt_not_32bit(dst, src, size);
2191 #else
2192  riscv_dsp_not_u32(src, dst, size);
2193 #endif
2194 #endif
2195 }
2203 static inline void hpm_dsp_not_u16(u16_t *src, u16_t *dst, uint32_t size)
2204 {
2205 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2206 #ifdef __zcc__
2207  tpt_not_16bit(dst, src, size);
2208 #else
2209  riscv_dsp_not_u16(src, dst, size);
2210 #endif
2211 #endif
2212 }
2220 static inline void hpm_dsp_not_u8(u8_t *src, u8_t *dst, uint32_t size)
2221 {
2222 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2223 #ifdef __zcc__
2224  tpt_not_8bit(dst, src, size);
2225 #else
2226  riscv_dsp_not_u8(src, dst, size);
2227 #endif
2228 #endif
2229 }
2234 #endif
2235 
2236 #include <stdint.h>
2237 
2245 
2253 
2261 
2269 
2270 #endif
2271 
2272 #ifdef HPM_MATH_DSP_COMPLEX
2273 
2283 #ifdef HPM_EN_MATH_DSP_LIB
2284 
2285 #ifdef __zcc__
2286 #include "tpt_math.h"
2287 #endif
2288 
2289 #include "riscv_dsp_complex_math.h"
2290 
2291 // Complex Conjugate
2298 static inline void hpm_dsp_cconj_f32(const float32_t *src, float32_t *dst, uint32_t size)
2299 {
2300 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2301 #ifdef __zcc__
2302  tpt_cmplx_conj_f32(dst, src, size);
2303 #else
2304  riscv_dsp_cconj_f32(src, dst, size);
2305 #endif
2306 #endif
2307 }
2308 
2318 static inline void hpm_dsp_cconj_q15(const q15_t *src, q15_t *dst, uint32_t size)
2319 {
2320 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2321 #ifdef __zcc__
2322  tpt_cmplx_conj_q15(dst, src, size);
2323 #else
2324  riscv_dsp_cconj_q15(src, dst, size);
2325 #endif
2326 #endif
2327 }
2328 
2338 static inline void hpm_dsp_cconj_q31(const q31_t *src, q31_t *dst, uint32_t size)
2339 {
2340 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2341 #ifdef __zcc__
2342  tpt_cmplx_conj_q31(dst, src, size);
2343 #else
2344  riscv_dsp_cconj_q31(src, dst, size);
2345 #endif
2346 #endif
2347 }
2348 
2349 // Complex Dot Product
2357 static inline void hpm_dsp_cdprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *dst)
2358 {
2359 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2360  riscv_dsp_cdprod_f32(src1, src2, size, dst);
2361 #endif
2362 }
2363 
2372 static inline void hpm_dsp_cdprod_typ2_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *rout, float32_t *iout)
2373 {
2374 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2375 #ifdef __zcc__
2376  tpt_cmplx_dot_prod_f32(rout, iout, src1, src2, size);
2377 #else
2378  riscv_dsp_cdprod_typ2_f32(src1, src2, size, rout, iout);
2379 #endif
2380 #endif
2381 }
2382 
2393 static inline void hpm_dsp_cdprod_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q15_t *dst)
2394 {
2395 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2396  riscv_dsp_cdprod_q15(src1, src2, size, dst);
2397 #endif
2398 }
2399 
2411 static inline void hpm_dsp_cdprod_typ2_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q31_t *rout, q31_t *iout)
2412 {
2413 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2414  riscv_dsp_cdprod_typ2_q15(src1, src2, size, rout, iout);
2415 #endif
2416 }
2417 
2428 static inline void hpm_dsp_cdprod_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q31_t *dst)
2429 {
2430 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2431  riscv_dsp_cdprod_q31(src1, src2, size, dst);
2432 #endif
2433 }
2434 
2435 
2447 static inline void hpm_dsp_cdprod_typ2_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q63_t *rout, q63_t *iout)
2448 {
2449 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2450 #ifdef __zcc__
2451  tpt_cmplx_dot_prod_q31(rout, iout, src1, src2, size);
2452 #else
2453  riscv_dsp_cdprod_typ2_q31(src1, src2, size, rout, iout);
2454 #endif
2455 #endif
2456 }
2457 
2458 // Complex Magnitude
2465 static inline void hpm_dsp_cmag_f32(const float32_t *src, float32_t *dst, uint32_t size)
2466 {
2467 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2468 #ifdef __zcc__
2469  tpt_cmplx_mag_f32(dst, src, size);
2470 #else
2471  riscv_dsp_cmag_f32(src, dst, size);
2472 #endif
2473 #endif
2474 }
2475 
2485 static inline void hpm_dsp_cmag_q15(const q15_t *src, q15_t *dst, uint32_t size)
2486 {
2487 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2488 #ifdef __zcc__
2489  tpt_cmplx_mag_q15(dst, src, size);
2490 #else
2491  riscv_dsp_cmag_q15(src, dst, size);
2492 #endif
2493 #endif
2494 }
2495 
2505 static inline void hpm_dsp_cmag_q31(const q31_t *src, q31_t *dst, uint32_t size)
2506 {
2507 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2508 #ifdef __zcc__
2509  tpt_cmplx_mag_q31(dst, src, size);
2510 #else
2511  riscv_dsp_cmag_q31(src, dst, size);
2512 #endif
2513 #endif
2514 }
2515 
2516 // Complex Magnitude Squared
2524 static inline void hpm_dsp_cmag_sqr_f32(const float32_t *src, float32_t *dst, uint32_t size)
2525 {
2526 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2527 #ifdef __zcc__
2528  tpt_cmplx_mag_squared_f32(dst, src, size);
2529 #else
2530  riscv_dsp_cmag_sqr_f32(src, dst, size);
2531 #endif
2532 #endif
2533 }
2534 
2544 static inline void hpm_dsp_cmag_sqr_q15(const q15_t *src, q15_t *dst, uint32_t size)
2545 {
2546 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2547 #ifdef __zcc__
2548  tpt_cmplx_mag_squared_q15(dst, src, size);
2549 #else
2550  riscv_dsp_cmag_sqr_q15(src, dst, size);
2551 #endif
2552 #endif
2553 }
2554 
2564 static inline void hpm_dsp_cmag_sqr_q31(const q31_t *src, q31_t *dst, uint32_t size)
2565 {
2566 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2567 #ifdef __zcc__
2568  tpt_cmplx_mag_squared_q31(dst, src, size);
2569 #else
2570  riscv_dsp_cmag_sqr_q31(src, dst, size);
2571 #endif
2572 #endif
2573 }
2574 
2575 // Complex Multiplication
2583 static inline void hpm_dsp_cmul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t size)
2584 {
2585 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2586 #ifdef __zcc__
2587  tpt_cmplx_mult_cmplx_f32(dst, src1, src2, size);
2588 #else
2589  riscv_dsp_cmul_f32(src1, src2, dst, size);
2590 #endif
2591 #endif
2592 }
2593 
2604 static inline void hpm_dsp_cmul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t size)
2605 {
2606 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2607 #ifdef __zcc__
2608  tpt_cmplx_mult_cmplx_q15(dst, src1, src2, size);
2609 #else
2610  riscv_dsp_cmul_q15(src1, src2, dst, size);
2611 #endif
2612 #endif
2613 }
2614 
2625 static inline void hpm_dsp_cmul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size)
2626 {
2627 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2628 #ifdef __zcc__
2629  tpt_cmplx_mult_cmplx_q31(dst, src1, src2, size);
2630 #else
2631  riscv_dsp_cmul_q31(src1, src2, dst, size);
2632 #endif
2633 #endif
2634 }
2635 
2636 // Complex-by-Real Multiplication
2644 static inline void hpm_dsp_cmul_real_f32(const float32_t *src, const float32_t *real, float32_t *dst, uint32_t size)
2645 {
2646 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2647 #ifdef __zcc__
2648  tpt_cmplx_mult_real_f32(dst, src, real, size);
2649 #else
2650  riscv_dsp_cmul_real_f32(src, real, dst, size);
2651 #endif
2652 #endif
2653 }
2654 
2664 static inline void hpm_dsp_cmul_real_q15(const q15_t *src, const q15_t *real, q15_t *dst, uint32_t size)
2665 {
2666 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2667 #ifdef __zcc__
2668  tpt_cmplx_mult_real_q15(dst, src, real, size);
2669 #else
2670  riscv_dsp_cmul_real_q15(src, real, dst, size);
2671 #endif
2672 #endif
2673 }
2674 
2684 static inline void hpm_dsp_cmul_real_q31(const q31_t *src, const q31_t *real, q31_t *dst, uint32_t size)
2685 {
2686 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2687 #ifdef __zcc__
2688  tpt_cmplx_mult_real_q31(dst, src, real, size);
2689 #else
2690  riscv_dsp_cmul_real_q31(src, real, dst, size);
2691 #endif
2692 #endif
2693 }
2694 #endif
2695 #endif
2696 
2702 #ifdef HPM_MATH_DSP_CONTROLLER
2703 
2709 #ifdef HPM_EN_MATH_DSP_LIB
2710 
2711 #include "riscv_dsp_controller_math.h"
2712 
2713 // Clarke Transform
2721 static inline void hpm_dsp_clarke_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta)
2722 {
2723 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2724  riscv_dsp_clarke_f32(a, b, alpha, beta);
2725 #endif
2726 }
2737 static inline void hpm_dsp_clarke_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta)
2738 {
2739 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2740  riscv_dsp_clarke_q31(a, b, alpha, beta);
2741 #endif
2742 }
2743 
2744 // Inverse Clarke Transform
2752 static inline void hpm_dsp_inv_clarke_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b)
2753 {
2754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2755  riscv_dsp_inv_clarke_f32(alpha, beta, a, b);
2756 #endif
2757 }
2768 static inline void hpm_dsp_inv_clarke_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b)
2769 {
2770 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2771  riscv_dsp_inv_clarke_q31(alpha, beta, a, b);
2772 #endif
2773 }
2774 
2775 // Park Transform
2785 static inline void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
2786 {
2787 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2788  riscv_dsp_park_f32(alpha, beta, a, b, sin, cos);
2789 #endif
2790 }
2791 
2804 static inline void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
2805 {
2806 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2807  riscv_dsp_park_q31(alpha, beta, a, b, sin, cos);
2808 #endif
2809 }
2810 
2811 // Inverse Park Transform
2821 static inline void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
2822 {
2823 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2824  riscv_dsp_inv_park_f32(a, b, alpha, beta, sin, cos);
2825 #endif
2826 }
2839 static inline void hpm_dsp_inv_park_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta, q31_t sin, q31_t cos)
2840 {
2841 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2842  riscv_dsp_inv_park_q31(a, b, alpha, beta, sin, cos);
2843 #endif
2844 }
2852 static inline float32_t hpm_dsp_pid_f32(riscv_dsp_pid_f32_t *instance, float32_t src)
2853 {
2854 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2855  return riscv_dsp_pid_f32(instance, src);
2856 #endif
2857 }
2870 static inline void hpm_dsp_init_pid_f32(riscv_dsp_pid_f32_t *instance, int32_t set)
2871 {
2872 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2873  riscv_dsp_init_pid_f32(instance, set);
2874 #endif
2875 }
2876 
2884 static inline q31_t hpm_dsp_pid_q31(riscv_dsp_pid_q31_t *instance, q31_t src)
2885 {
2886 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2887  return riscv_dsp_pid_q31(instance, src);
2888 #endif
2889 }
2890 
2903 static inline void hpm_dsp_init_pid_q31(riscv_dsp_pid_q31_t *instance, int32_t set)
2904 {
2905 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2906  riscv_dsp_init_pid_q31(instance, set);
2907 #endif
2908 }
2909 
2910 static inline q15_t hpm_dsp_pid_q15(riscv_dsp_pid_q15_t *instance, q15_t src)
2911 {
2912 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2913  return riscv_dsp_pid_q15(instance, src);
2914 #endif
2915 }
2928 static inline void hpm_dsp_init_pid_q15(riscv_dsp_pid_q15_t *instance, int32_t set)
2929 {
2930 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2931  riscv_dsp_init_pid_q15(instance, set);
2932 #endif
2933 }
2934 #endif
2935 #endif
2936 
2942 #ifdef HPM_MATH_DSP_DISTANCE
2943 
2949 #ifdef HPM_EN_MATH_DSP_LIB
2950 #ifdef __zcc__
2951 #include "tpt_math.h"
2952 #endif
2953 #include "riscv_dsp_distance_math.h"
2954 
2955 
2963 static inline float32_t hpm_dsp_dist_bray_curtis_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
2964 {
2965 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2966 #ifdef __zcc__
2967  return tpt_braycurtis_distance_f32(src1, src2, size);
2968 #else
2969  return riscv_dsp_dist_bray_curtis_f32(src1, src2, size);
2970 #endif
2971 #endif
2972 }
2973 
2981 static inline float32_t hpm_dsp_dist_canberra_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
2982 {
2983 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
2984 #ifdef __zcc__
2985  return tpt_canberra_distance_f32(src1, src2, size);
2986 #else
2987  return riscv_dsp_dist_canberra_f32(src1, src2, size);
2988 #endif
2989 #endif
2990 }
2991 
2999 static inline float32_t hpm_dsp_dist_chebyshev_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3000 {
3001 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3002 #ifdef __zcc__
3003  return tpt_chebyshev_distance_f32(src1, src2, size);
3004 #else
3005  return riscv_dsp_dist_chebyshev_f32(src1, src2, size);
3006 #endif
3007 #endif
3008 }
3009 
3017 static inline float32_t hpm_dsp_dist_city_block_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3018 {
3019 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3020 #ifdef __zcc__
3021  return tpt_cityblock_distance_f32(src1, src2, size);
3022 #else
3023  return riscv_dsp_dist_city_block_f32(src1, src2, size);
3024 #endif
3025 #endif
3026 }
3027 
3035 static inline float32_t hpm_dsp_dist_corr_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3036 {
3037 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3038 #ifdef __zcc__
3039  return tpt_correlation_distance_f32(src1, src2, size);
3040 #else
3041  return riscv_dsp_dist_corr_f32(src1, src2, size);
3042 #endif
3043 #endif
3044 }
3045 
3053 static inline float32_t hpm_dsp_dist_cos_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3054 {
3055 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3056 #ifdef __zcc__
3057  return tpt_cosine_distance_f32(src1, src2, size);
3058 #else
3059  return riscv_dsp_dist_cos_f32(src1, src2, size);
3060 #endif
3061 #endif
3062 }
3063 
3071 static inline float32_t hpm_dsp_dist_euclidean_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3072 {
3073 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3074 #ifdef __zcc__
3075  return tpt_euclidean_distance_f32(src1, src2, size);
3076 #else
3077  return riscv_dsp_dist_euclidean_f32(src1, src2, size);
3078 #endif
3079 #endif
3080 }
3081 
3089 static inline float32_t hpm_dsp_dist_jensen_shannon_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
3090 {
3091 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3092 #ifdef __zcc__
3093  return tpt_jensenshannon_distance_f32(src1, src2, size);
3094 #else
3095  return riscv_dsp_dist_jensen_shannon_f32(src1, src2, size);
3096 #endif
3097 #endif
3098 }
3099 
3108 static inline float32_t hpm_dsp_dist_minkowski_f32(const float32_t *src1, const float32_t *src2, int32_t order, uint32_t size)
3109 {
3110 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3111 #ifdef __zcc__
3112  return tpt_minkowski_distance_f32(src1, src2, order, size);
3113 #else
3114  return riscv_dsp_dist_minkowski_f32(src1, src2, order, size);
3115 #endif
3116 #endif
3117 }
3118 
3126 static inline float32_t hpm_dsp_bdist_dice_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3127 {
3128 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3129 #ifdef __zcc__
3130  return tpt_dice_distance(src1, src2, numofbool);
3131 #else
3132  return riscv_dsp_bdist_dice_u32_f32(src1, src2, numofbool);
3133 #endif
3134 #endif
3135 }
3136 
3144 static inline float32_t hpm_dsp_bdist_hamming_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3145 {
3146 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3147 #ifdef __zcc__
3148  return tpt_hamming_distance(src1, src2, numofbool);
3149 #else
3150  return riscv_dsp_bdist_hamming_u32_f32(src1, src2, numofbool);
3151 #endif
3152 #endif
3153 }
3154 
3162 static inline float32_t hpm_dsp_bdist_jaccard_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3163 {
3164 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3165 #ifdef __zcc__
3166  return tpt_jaccard_distance(src1, src2, numofbool);
3167 #else
3168  return riscv_dsp_bdist_jaccard_u32_f32(src1, src2, numofbool);
3169 #endif
3170 #endif
3171 }
3172 
3180 static inline float32_t hpm_dsp_bdist_kulsinski_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3181 {
3182 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3183 #ifdef __zcc__
3184  return tpt_kulsinski_distance(src1, src2, numofbool);
3185 #else
3186  return riscv_dsp_bdist_kulsinski_u32_f32(src1, src2, numofbool);
3187 #endif
3188 #endif
3189 }
3190 
3198 static inline float32_t hpm_dsp_bdist_sokal_michener_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3199 {
3200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3201 #ifdef __zcc__
3202  return tpt_sokalmichener_distance(src1, src2, numofbool);
3203 #else
3204  return riscv_dsp_bdist_sokal_michener_u32_f32(src1, src2, numofbool);
3205 #endif
3206 #endif
3207 }
3208 
3216 static inline float32_t hpm_dsp_bdist_sokal_sneath_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3217 {
3218 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3219 #ifdef __zcc__
3220  return tpt_sokalsneath_distance(src1, src2, numofbool);
3221 #else
3222  return riscv_dsp_bdist_sokal_sneath_u32_f32(src1, src2, numofbool);
3223 #endif
3224 #endif
3225 }
3226 
3234 static inline float32_t hpm_dsp_bdist_rogers_tanimoto_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3235 {
3236 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3237 #ifdef __zcc__
3238  return tpt_rogerstanimoto_distance(src1, src2, numofbool);
3239 #else
3240  return riscv_dsp_bdist_rogers_tanimoto_u32_f32(src1, src2, numofbool);
3241 #endif
3242 #endif
3243 }
3244 
3252 static inline float32_t hpm_dsp_bdist_yule_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3253 {
3254 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3255 #ifdef __zcc__
3256  return tpt_yule_distance(src1, src2, numofbool);
3257 #else
3258  return riscv_dsp_bdist_yule_u32_f32(src1, src2, numofbool);
3259 #endif
3260 #endif
3261 }
3262 
3270 static inline float32_t hpm_dsp_bdist_russell_rao_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
3271 {
3272 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3273 #ifdef __zcc__
3274  return tpt_russellrao_distance(src1, src2, numofbool);
3275 #else
3276  return riscv_dsp_bdist_russell_rao_u32_f32(src1, src2, numofbool);
3277 #endif
3278 #endif
3279 }
3280 #endif
3281 #endif
3282 
3288 #ifdef HPM_MATH_DSP_FILTERING
3289 
3295 #ifdef HPM_EN_MATH_DSP_LIB
3296 
3297 #ifdef __zcc__
3298 #include "tpt_math.h"
3299 #endif
3300 
3301 #include "riscv_dsp_filtering_math.h"
3302 
3310 static inline void hpm_dsp_fir_f32(const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3311 {
3312 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3313  riscv_dsp_fir_f32(instance, src, dst, size);
3314 #endif
3315 }
3316 
3331 static inline void hpm_dsp_fir_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3332 {
3333 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3334  riscv_dsp_fir_q31(instance, src, dst, size);
3335 #endif
3336 }
3337 
3352 static inline void hpm_dsp_fir_fast_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3353 {
3354 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3355  riscv_dsp_fir_fast_q31(instance, src, dst, size);
3356 #endif
3357 }
3372 static inline void hpm_dsp_fir_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3373 {
3374 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3375  riscv_dsp_fir_q15(instance, src, dst, size);
3376 #endif
3377 }
3378 
3391 static inline void hpm_dsp_fir_fast_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3392 {
3393 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3394  riscv_dsp_fir_fast_q15(instance, src, dst, size);
3395 #endif
3396 }
3410 static inline void hpm_dsp_fir_q7(const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t size)
3411 {
3412 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3413  riscv_dsp_fir_q7(instance, src, dst, size);
3414 #endif
3415 }
3416 
3425 static inline void hpm_dsp_lfir_f32(const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3426 {
3427 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3428  riscv_dsp_lfir_f32(instance, src, dst, size);
3429 #endif
3430 }
3431 
3440 static inline void hpm_dsp_lfir_q15(const riscv_dsp_lfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3441 {
3442 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3443  riscv_dsp_lfir_q15(instance, src, dst, size);
3444 #endif
3445 }
3446 
3459 static inline void hpm_dsp_lfir_q31(const riscv_dsp_lfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3460 {
3461 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3462  riscv_dsp_lfir_q31(instance, src, dst, size);
3463 #endif
3464 }
3465 static inline void hpm_dsp_dcmfir_f32(const riscv_dsp_dcmfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3466 {
3467 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3468  riscv_dsp_dcmfir_f32(instance, src, dst, size);
3469 #endif
3470 }
3471 static inline void hpm_dsp_dcmfir_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3472 {
3473 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3474  riscv_dsp_dcmfir_q15(instance, src, dst, size);
3475 #endif
3476 }
3477 static inline void hpm_dsp_dcmfir_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3478 {
3479 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3480  riscv_dsp_dcmfir_q31(instance, src, dst, size);
3481 #endif
3482 }
3483 static inline void hpm_dsp_dcmfir_fast_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3484 {
3485 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3486  riscv_dsp_dcmfir_fast_q31(instance, src, dst, size);
3487 #endif
3488 }
3489 static inline void hpm_dsp_dcmfir_fast_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3490 {
3491 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3492  riscv_dsp_dcmfir_fast_q15(instance, src, dst, size);
3493 #endif
3494 }
3495 static inline void hpm_dsp_upsplfir_f32(const riscv_dsp_upsplfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3496 {
3497 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3498  riscv_dsp_upsplfir_f32(instance, src, dst, size);
3499 #endif
3500 }
3501 static inline void hpm_dsp_upsplfir_q15(const riscv_dsp_upsplfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3502 {
3503 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3504  riscv_dsp_upsplfir_q15(instance, src, dst, size);
3505 #endif
3506 }
3507 static inline void hpm_dsp_upsplfir_q31(const riscv_dsp_upsplfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3508 {
3509 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3510  riscv_dsp_upsplfir_q31(instance, src, dst, size);
3511 #endif
3512 }
3513 static inline void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t size)
3514 {
3515 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3516  riscv_dsp_spafir_f32(instance, src, dst, buf, size);
3517 #endif
3518 }
3519 static inline void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t size)
3520 {
3521 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3522  riscv_dsp_spafir_q15(instance, src, dst, buf1, buf2, size);
3523 #endif
3524 }
3525 static inline void hpm_dsp_spafir_q31(riscv_dsp_spafir_q31_t *instance, q31_t *src, q31_t *dst, q31_t *buf, uint32_t size)
3526 {
3527 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3528  riscv_dsp_spafir_q31(instance, src, dst, buf, size);
3529 #endif
3530 }
3531 static inline void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t size)
3532 {
3533 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3534  riscv_dsp_spafir_q7(instance, src, dst, buf1, buf2, size);
3535 #endif
3536 }
3537 
3538 // Standard LMS filte
3552 static inline void hpm_dsp_lms_f32(const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
3553 {
3554 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3555  riscv_dsp_lms_f32(instance, src, ref, dst, err, size);
3556 #endif
3557 }
3558 
3575 static inline void hpm_dsp_lms_q31(const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
3576 {
3577 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3578  riscv_dsp_lms_q31(instance, src, ref, dst, err, size);
3579 #endif
3580 }
3581 
3598 static inline void hpm_dsp_lms_q15(const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
3599 {
3600 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3601  riscv_dsp_lms_q15(instance, src, ref, dst, err, size);
3602 #endif
3603 }
3604 
3609 static inline void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
3610 {
3611 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3612  riscv_dsp_nlms_f32(instance, src, ref, dst, err, size);
3613 #endif
3614 }
3615 
3616 
3621 static inline void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
3622 {
3623 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3624  riscv_dsp_nlms_q31(instance, src, ref, dst, err, size);
3625 #endif
3626 }
3627 
3628 
3629 static inline void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
3630 {
3631 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3632  riscv_dsp_nlms_q15(instance, src, ref, dst, err, size);
3633 #endif
3634 }
3635 
3636 
3637 // Convolution
3647 static inline void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3648 {
3649 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3650 #ifdef __zcc__
3651  tpt_conv_f32(dst, src1, len1, src2, len2);
3652 #else
3653  riscv_dsp_conv_f32(src1, len1, src2, len2, dst);
3654 #endif
3655 #endif
3656 }
3657 
3673 static inline void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3674 {
3675 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3676 #ifdef __zcc__
3677  tpt_conv_q15(dst, src1, len1, src2, len2);
3678 #else
3679  riscv_dsp_conv_q15(src1, len1, src2, len2, dst);
3680 #endif
3681 #endif
3682 }
3683 
3701 static inline void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3702 {
3703 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3704 #ifdef __zcc__
3705  tpt_conv_q31(dst, src1, len1, src2, len2);
3706 #else
3707  riscv_dsp_conv_q31(src1, len1, src2, len2, dst);
3708 #endif
3709 #endif
3710 }
3711 
3727 static inline void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3728 {
3729 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3730 #ifdef __zcc__
3731  tpt_conv_q7(dst, src1, len1, src2, len2);
3732 #else
3733  riscv_dsp_conv_q7(src1, len1, src2, len2, dst);
3734 #endif
3735 #endif
3736 }
3737 
3752 static inline int32_t hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t size)
3753 {
3754 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3755 #ifdef __zcc__
3756  return tpt_conv_partial_f32(dst, src1, len1, src2, len2, startindex, size);
3757 #else
3758  return riscv_dsp_conv_partial_f32(src1, len1, src2, len2, dst, startindex,
3759  size);
3760 #endif
3761 #endif
3762 }
3763 
3778 static inline int32_t hpm_dsp_conv_partial_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst, uint32_t startindex, uint32_t size)
3779 {
3780 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3781 #ifdef __zcc__
3782  return tpt_conv_partial_q15(dst, src1, len1, src2, len2, startindex, size);
3783 #else
3784  return riscv_dsp_conv_partial_q15(src1, len1, src2, len2, dst, startindex,
3785  size);
3786 #endif
3787 #endif
3788 }
3789 
3804 static inline int32_t hpm_dsp_conv_partial_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst, uint32_t startindex, uint32_t size)
3805 {
3806 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3807 #ifdef __zcc__
3808  return tpt_conv_partial_q31(dst, src1, len1, src2, len2, startindex, size);
3809 #else
3810  return riscv_dsp_conv_partial_q31(src1, len1, src2, len2, dst, startindex,
3811  size);
3812 #endif
3813 #endif
3814 }
3815 
3830 static inline int32_t hpm_dsp_conv_partial_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst, uint32_t startindex, uint32_t size)
3831 {
3832 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3833 #ifdef __zcc__
3834  return tpt_conv_partial_q7(dst, src1, len1, src2, len2, startindex, size);
3835 #else
3836  return riscv_dsp_conv_partial_q7(src1, len1, src2, len2, dst, startindex,
3837  size);
3838 #endif
3839 #endif
3840 }
3841 
3842 
3843 // Correlation
3853 static inline void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
3854 {
3855 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3856 #ifdef __zcc__
3857  tpt_correlate_f32(dst, src1, len1, src2, len2);
3858 #else
3859  riscv_dsp_corr_f32(src1, len1, src2, len2, dst);
3860 #endif
3861 #endif
3862 }
3863 
3879 static inline void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
3880 {
3881 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3882 #ifdef __zcc__
3883  tpt_correlate_q15(dst, src1, len1, src2, len2);
3884 #else
3885  riscv_dsp_corr_q15(src1, len1, src2, len2, dst);
3886 #endif
3887 #endif
3888 }
3889 
3909 static inline void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
3910 {
3911 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3912 #ifdef __zcc__
3913  tpt_correlate_q31(dst, src1, len1, src2, len2);
3914 #else
3915  riscv_dsp_corr_q31(src1, len1, src2, len2, dst);
3916 #endif
3917 #endif
3918 }
3919 
3935 static inline void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
3936 {
3937 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3938 #ifdef __zcc__
3939  tpt_correlate_q7(dst, src1, len1, src2, len2);
3940 #else
3941  riscv_dsp_corr_q7(src1, len1, src2, len2, dst);
3942 #endif
3943 #endif
3944 }
3945 static inline void hpm_dsp_bq_df1_f32(const riscv_dsp_bq_df1_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3946 {
3947 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3948  riscv_dsp_bq_df1_f32(instance, src, dst, size);
3949 #endif
3950 }
3951 static inline void hpm_dsp_bq_df1_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3952 {
3953 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3954  riscv_dsp_bq_df1_q15(instance, src, dst, size);
3955 #endif
3956 }
3957 static inline void hpm_dsp_bq_df1_fast_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
3958 {
3959 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3960  riscv_dsp_bq_df1_fast_q15(instance, src, dst, size);
3961 #endif
3962 }
3963 static inline void hpm_dsp_bq_df1_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3964 {
3965 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3966  riscv_dsp_bq_df1_q31(instance, src, dst, size);
3967 #endif
3968 }
3969 static inline void hpm_dsp_bq_df1_fast_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3970 {
3971 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3972  riscv_dsp_bq_df1_fast_q31(instance, src, dst, size);
3973 #endif
3974 }
3975 static inline void hpm_dsp_bq_df1_32x64_q31(const riscv_dsp_bq_df1_32x64_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
3976 {
3977 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3978  riscv_dsp_bq_df1_32x64_q31(instance, src, dst, size);
3979 #endif
3980 }
3981 static inline void hpm_dsp_bq_df2T_f32(const riscv_dsp_bq_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3982 {
3983 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3984  riscv_dsp_bq_df2T_f32(instance, src, dst, size);
3985 #endif
3986 }
3987 static inline void hpm_dsp_bq_df2T_f64(const riscv_dsp_bq_df2T_f64_t *instance, float64_t *src, float64_t *dst, uint32_t size)
3988 {
3989 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3990  riscv_dsp_bq_df2T_f64(instance, src, dst, size);
3991 #endif
3992 }
3993 static inline void hpm_dsp_bq_stereo_df2T_f32(const riscv_dsp_bq_stereo_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
3994 {
3995 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
3996  riscv_dsp_bq_stereo_df2T_f32(instance, src, dst, size);
3997 #endif
3998 }
3999 
4000 static inline void hpm_dsp_liir_f32(const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
4001 {
4002 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4003  riscv_dsp_liir_f32(instance, src, dst, size);
4004 #endif
4005 }
4006 static inline void hpm_dsp_liir_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
4007 {
4008 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4009  riscv_dsp_liir_q31(instance, src, dst, size);
4010 #endif
4011 }
4012 static inline void hpm_dsp_liir_fast_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
4013 {
4014 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4015  riscv_dsp_liir_fast_q31(instance, src, dst, size);
4016 #endif
4017 }
4018 static inline void hpm_dsp_liir_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
4019 {
4020 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4021  riscv_dsp_liir_q15(instance, src, dst, size);
4022 #endif
4023 }
4024 static inline void hpm_dsp_liir_fast_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
4025 {
4026 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4027  riscv_dsp_liir_fast_q15(instance, src, dst, size);
4028 #endif
4029 }
4030 #endif
4031 #endif
4032 
4038 #ifdef HPM_MATH_DSP_MATRIX
4039 
4063 #ifdef HPM_EN_MATH_DSP_LIB
4064 #ifdef __zcc__
4065 #include "tpt_math.h"
4066 #endif
4067 #include "riscv_dsp_matrix_math.h"
4068 
4069 // Matrix Addition
4078 static inline void hpm_dsp_mat_add_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4079 {
4080 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4081 #ifdef __zcc__
4082  tpt_mat_add_f32(dst, src1, src2, row, col);
4083 #else
4084  riscv_dsp_mat_add_f32(src1, src2, dst, row, col);
4085 #endif
4086 #endif
4087 }
4088 
4099 static inline void hpm_dsp_mat_add_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4100 {
4101 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4102 #ifdef __zcc__
4103  tpt_mat_add_q15(dst, src1, src2, row, col);
4104 #else
4105  riscv_dsp_mat_add_q15(src1, src2, dst, row, col);
4106 #endif
4107 #endif
4108 }
4109 
4120 static inline void hpm_dsp_mat_add_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4121 {
4122 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4123 #ifdef __zcc__
4124  tpt_mat_add_q31(dst, src1, src2, row, col);
4125 #else
4126  riscv_dsp_mat_add_q31(src1, src2, dst, row, col);
4127 #endif
4128 #endif
4129 }
4130 
4131 // Matrix Inverse
4139 static inline int32_t hpm_dsp_mat_inv_f32(float32_t *src, float32_t *dst, uint32_t size)
4140 {
4141 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4142 #ifdef __zcc__
4143  return tpt_mat_inverse_f32(dst, src, size);
4144 #else
4145  return riscv_dsp_mat_inv_f32(src, dst, size);
4146 #endif
4147 #endif
4148 }
4149 static inline int32_t hpm_dsp_mat_inv_f64(float64_t *src, float64_t *dst, uint32_t size)
4150 {
4151 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4152 #ifdef __zcc__
4153  return tpt_mat_inverse_f64(dst, src, size);
4154 #else
4155  return riscv_dsp_mat_inv_f64(src, dst, size);
4156 #endif
4157 #endif
4158 }
4159 
4160 // Matrix Multiplication
4170 static inline void hpm_dsp_mat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4171 {
4172 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4173 #ifdef __zcc__
4174  return tpt_mat_mult_f32(dst, src1, src2, row, col, col2);
4175 #else
4176  riscv_dsp_mat_mul_f32(src1, src2, dst, row, col, col2);
4177 #endif
4178 #endif
4179 }
4180 
4181 static inline void hpm_dsp_mat_mul_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4182 {
4183 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4184 #ifdef __zcc__
4185  return tpt_mat_mult_f64(dst, src1, src2, row, col, col2);
4186 #else
4187  riscv_dsp_mat_mul_f64(src1, src2, dst, row, col, col2);
4188 #endif
4189 #endif
4190 }
4191 
4201 static inline void hpm_dsp_cmat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4202 {
4203 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4204 #ifdef __zcc__
4205  return tpt_mat_cmplx_mult_f32(dst, src1, src2, row, col, col2);
4206 #else
4207  riscv_dsp_cmat_mul_f32(src1, src2, dst, row, col, col2);
4208 #endif
4209 #endif
4210 }
4211 
4228 static inline void hpm_dsp_mat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4229 {
4230 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4231 #ifdef __zcc__
4232  return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4233 #else
4234  riscv_dsp_mat_mul_q15(src1, src2, dst, row, col, col2);
4235 #endif
4236 #endif
4237 }
4238 static inline void hpm_dsp_mat_mul_fast_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4239 {
4240 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4241 #ifdef __zcc__
4242  return tpt_mat_mult_q15(dst, src1, src2, row, col, col2);
4243 #else
4244  riscv_dsp_mat_mul_fast_q15(src1, src2, dst, row, col, col2);
4245 #endif
4246 #endif
4247 }
4248 
4265 static inline void hpm_dsp_cmat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4266 {
4267 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4268 #ifdef __zcc__
4269  return tpt_mat_cmplx_mult_q15(dst, src1, src2, row, col, col2);
4270 #else
4271  riscv_dsp_cmat_mul_q15(src1, src2, dst, row, col, col2);
4272 #endif
4273 #endif
4274 }
4275 
4292 static inline void hpm_dsp_mat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4293 {
4294 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4295 #ifdef __zcc__
4296  return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4297 #else
4298  riscv_dsp_mat_mul_q31(src1, src2, dst, row, col, col2);
4299 #endif
4300 #endif
4301 }
4302 static inline void hpm_dsp_mat_mul_fast_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4303 {
4304 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4305 #ifdef __zcc__
4306  return tpt_mat_mult_q31(dst, src1, src2, row, col, col2);
4307 #else
4308  riscv_dsp_mat_mul_fast_q31(src1, src2, dst, row, col, col2);
4309 #endif
4310 #endif
4311 }
4312 
4329 static inline void hpm_dsp_cmat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4330 {
4331 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4332 #ifdef __zcc__
4333  return tpt_mat_cmplx_mult_q31(dst, src1, src2, row, col, col2);
4334 #else
4335  riscv_dsp_cmat_mul_q31(src1, src2, dst, row, col, col2);
4336 #endif
4337 #endif
4338 }
4339 
4356 static inline void hpm_dsp_mat_mul_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
4357 {
4358 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4359  riscv_dsp_mat_mul_q7(src1, src2, dst, row, col, col2);
4360 #endif
4361 }
4362 
4378 static inline void hpm_dsp_mat_mul_vxm_q7(const q7_t * src1, const q7_t * src2, q7_t * dst, uint32_t col, uint32_t col2)
4379 {
4380 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4381 #ifdef __zcc__
4382  tpt_mat_mul_mxv_q7(dst, src1, src2, col, col2);
4383 #else
4384  riscv_dsp_mat_mul_vxm_q7(src1, src2, dst, col, col2);
4385 #endif
4386 #endif
4387 }
4388 
4389 // Matrix Power 2 Function
4390 //
4391 // The input is a square matrix for riscv_dsp_mat_pow2_cache_f64.
4392 static inline int32_t hpm_dsp_mat_pwr2_cache_f64(const float64_t *src, float64_t *dst, uint32_t size)
4393 {
4394 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4395  return riscv_dsp_mat_pwr2_cache_f64(src, dst, size);
4396 #endif
4397 }
4398 
4399 // Matrix Scale
4408 static inline void hpm_dsp_mat_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
4409 {
4410 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4411 #ifdef __zcc__
4412  tpt_mat_scale_f32(dst, src, row, col, scale);
4413 #else
4414  riscv_dsp_mat_scale_f32(src, scale, dst, row, col);
4415 #endif
4416 #endif
4417 }
4418 
4433 static inline void hpm_dsp_mat_scale_q15(const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
4434 {
4435 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4436 #ifdef __zcc__
4437  tpt_mat_scale_q15(dst, src, row, col, scale_fract, shift);
4438 #else
4439  riscv_dsp_mat_scale_q15(src, scale_fract, shift, dst, row, col);
4440 #endif
4441 #endif
4442 }
4443 
4458 static inline void hpm_dsp_mat_scale_q31(const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
4459 {
4460 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4461 #ifdef __zcc__
4462  tpt_mat_scale_q31(dst, src, row, col, scale_fract, shift);
4463 #else
4464  riscv_dsp_mat_scale_q31(src, scale_fract, shift, dst, row, col);
4465 #endif
4466 #endif
4467 }
4468 
4469 // Matrix Subtraction
4470 
4480 static inline void hpm_dsp_mat_sub_f64(const float64_t *src1, const float64_t *src2,
4481  float64_t *dst, uint32_t row, uint32_t col)
4482 {
4483 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4484 #ifdef __zcc__
4485  tpt_mat_sub_f64(dst, src1, src2, row, col);
4486 #else
4487  riscv_dsp_mat_sub_f64(src1, src2, dst, row, col);
4488 #endif
4489 #endif
4490 }
4491 
4500 static inline void hpm_dsp_mat_sub_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
4501 {
4502 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4503 #ifdef __zcc__
4504  tpt_mat_sub_f32(dst, src1, src2, row, col);
4505 #else
4506  riscv_dsp_mat_sub_f32(src1, src2, dst, row, col);
4507 #endif
4508 #endif
4509 }
4510 
4521 static inline void hpm_dsp_mat_sub_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
4522 {
4523 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4524 #ifdef __zcc__
4525  tpt_mat_sub_q15(dst, src1, src2, row, col);
4526 #else
4527  riscv_dsp_mat_sub_q15(src1, src2, dst, row, col);
4528 #endif
4529 #endif
4530 }
4531 
4542 static inline void hpm_dsp_mat_sub_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
4543 {
4544 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4545 #ifdef __zcc__
4546  tpt_mat_sub_q31(dst, src1, src2, row, col);
4547 #else
4548  riscv_dsp_mat_sub_q31(src1, src2, dst, row, col);
4549 #endif
4550 #endif
4551 }
4552 
4553 // Matrix Transpose
4554 
4563 static inline void hpm_dsp_mat_trans_f64(const float64_t *src, float64_t *dst, uint32_t row, uint32_t col)
4564 {
4565 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4566 #ifdef __zcc__
4567  tpt_mat_trans_f64(dst, src, row, col);
4568 #else
4569  riscv_dsp_mat_trans_f64(src, dst, row, col);
4570 #endif
4571 #endif
4572 }
4573 
4581 static inline void hpm_dsp_mat_trans_f32(const float32_t *src, float32_t *dst, uint32_t row, uint32_t col)
4582 {
4583 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4584  riscv_dsp_mat_trans_f32(src, dst, row, col);
4585 #endif
4586 }
4587 
4595 static inline void hpm_dsp_mat_trans_q15(const q15_t *src, q15_t *dst, uint32_t row, uint32_t col)
4596 {
4597 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4598 #ifdef __zcc__
4599  tpt_mat_trans_q15(dst, src, row, col);
4600 #else
4601  riscv_dsp_mat_trans_q15(src, dst, row, col);
4602 #endif
4603 #endif
4604 }
4605 
4613 static inline void hpm_dsp_mat_trans_q31(const q31_t *src, q31_t *dst, uint32_t row, uint32_t col)
4614 {
4615 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4616 #ifdef __zcc__
4617  tpt_mat_trans_q31(dst, src, row, col);
4618 #else
4619  riscv_dsp_mat_trans_q31(src, dst, row, col);
4620 #endif
4621 #endif
4622 }
4623 
4631 static inline void hpm_dsp_mat_trans_u8(const uint8_t *src, uint8_t *dst, uint32_t row, uint32_t col)
4632 {
4633 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4634  riscv_dsp_mat_trans_u8(src, dst, row, col);
4635 #endif
4636 }
4637 
4646 static inline void hpm_dsp_mat_trans_q7(const q7_t *src, q7_t *dst, uint32_t row, uint32_t col)
4647 {
4648 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4649  riscv_dsp_mat_trans_q7(src, dst, row, col);
4650 #endif
4651 }
4652 
4684 static inline void hpm_dsp_mat_oprod_q31(const q31_t * src1, const q31_t * src2,
4685  q31_t * dst, uint32_t size1, uint32_t size2)
4686 {
4687 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4688 #ifdef __zcc__
4689  tpt_mat_oprod_q31(dst, src1, src2, size1, size2);
4690 #else
4691  riscv_dsp_mat_oprod_q31(src1, src2, dst, size1, size2);
4692 #endif
4693 #endif
4694 }
4695 
4718 static inline void hpm_dsp_mat_mul_mxv_f32(const float32_t *src1, const float32_t *src2,
4719  float32_t *dst, uint32_t row, uint32_t col)
4720 {
4721 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4722 #ifdef __zcc__
4723  tpt_mat_mul_mxv_f32(dst, src1, src2, row, col);
4724 #else
4725  riscv_dsp_mat_mul_mxv_f32(src1, src2, dst, row, col);
4726 #endif
4727 #endif
4728 }
4738 static inline void hpm_dsp_mat_mul_mxv_q15(const q15_t *src1, const q15_t *src2,
4739  q15_t *dst, uint32_t row, uint32_t col)
4740 {
4741 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4742 #ifdef __zcc__
4743  tpt_mat_mul_mxv_q15(dst, src1, src2, row, col);
4744 #else
4745  riscv_dsp_mat_mul_mxv_q15(src1, src2, dst, row, col);
4746 #endif
4747 #endif
4748 }
4758 static inline void hpm_dsp_mat_mul_mxv_q31(const q31_t *src1, const q31_t *src2,
4759  q31_t *dst, uint32_t row, uint32_t col)
4760 {
4761 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4762 #ifdef __zcc__
4763  tpt_mat_mul_mxv_q31(dst, src1, src2, row, col);
4764 #else
4765  riscv_dsp_mat_mul_mxv_q31(src1, src2, dst, row, col);
4766 #endif
4767 #endif
4768 }
4778 static inline void hpm_dsp_mat_mul_mxv_q7(const q7_t *src1, const q7_t *src2,
4779  q7_t *dst, uint32_t row, uint32_t col)
4780 {
4781 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4782 #ifdef __zcc__
4783  tpt_mat_mul_mxv_q7(dst, src1, src2, row, col);
4784 #else
4785  riscv_dsp_mat_mul_mxv_q7(src1, src2, dst, row, col);
4786 #endif
4787 #endif
4788 }
4789 
4790 #endif
4791 #endif
4792 
4798 #ifdef HPM_MATH_DSP_SVM
4799 
4806 #ifdef HPM_EN_MATH_DSP_LIB
4807 #ifdef __zcc__
4808 #include "tpt_math.h"
4809 #endif
4810 #include "riscv_dsp_svm_math.h"
4818 static inline void hpm_dsp_svm_linear_est_f32(const riscv_dsp_svm_linear_f32_t *instance, const float32_t *src, int32_t *result)
4819 {
4820 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4821  riscv_dsp_svm_linear_est_f32(instance, src, result);
4822 #endif
4823 }
4824 
4832 static inline void hpm_dsp_svm_sigmoid_est_f32(const riscv_dsp_svm_sigmoid_f32_t *instance, const float32_t *src, int32_t *result)
4833 {
4834 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4835  riscv_dsp_svm_sigmoid_est_f32(instance, src, result);
4836 #endif
4837 }
4838 
4846 static inline void hpm_dsp_svm_rbf_est_f32(const riscv_dsp_svm_rbf_f32_t *instance, const float32_t *src, int32_t *result)
4847 {
4848 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4849  riscv_dsp_svm_rbf_est_f32(instance, src, result);
4850 #endif
4851 }
4852 
4860 static inline void hpm_dsp_svm_poly_est_f32(const riscv_dsp_svm_poly_f32_t *instance, const float32_t *src, int32_t *result)
4861 {
4862 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4863  riscv_dsp_svm_poly_est_f32(instance, src, result);
4864 #endif
4865 }
4866 
4867 #endif
4868 #endif
4869 
4875 #ifdef HPM_MATH_DSP_TRANSFORM
4876 
4882 #ifdef HPM_EN_MATH_DSP_LIB
4883 #ifdef __zcc__
4884 #include "tpt_math.h"
4885 #endif
4886 #include "riscv_dsp_transform_math.h"
4916 static inline int32_t hpm_dsp_cfft_rd2_f32(float32_t *src, uint32_t m)
4917 {
4918 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4919 #ifdef __zcc__
4920  return tpt_cfft_f32(src, m, false);
4921 #else
4922  return riscv_dsp_cfft_rd2_f32(src, m);
4923 #endif
4924 #endif
4925 }
4926 
4934 static inline int32_t hpm_dsp_cifft_rd2_f32(float32_t *src, uint32_t m)
4935 {
4936 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4937 #ifdef __zcc__
4938  return tpt_cfft_f32(src, m, true);
4939 #else
4940  return riscv_dsp_cifft_rd2_f32(src, m);
4941 #endif
4942 
4943 #endif
4944 }
4945 
4959 static inline int32_t hpm_dsp_cfft_rd2_q15(q15_t *src, uint32_t m)
4960 {
4961 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4962 #ifdef __zcc__
4963  return tpt_cfft_q15(src, m, false);
4964 #else
4965  return riscv_dsp_cfft_rd2_q15(src, m);
4966 #endif
4967 #endif
4968 }
4969 
4983 static inline int32_t hpm_dsp_cifft_rd2_q15(q15_t *src, uint32_t m)
4984 {
4985 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
4986 #ifdef __zcc__
4987  return tpt_cfft_q15(src, m, true);
4988 #else
4989  return riscv_dsp_cifft_rd2_q15(src, m);
4990 #endif
4991 #endif
4992 }
4993 
5007 static inline int32_t hpm_dsp_cfft_rd2_q31(q31_t *src, uint32_t m)
5008 {
5009 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5010 #ifdef __zcc__
5011  return tpt_cfft_q31(src, m, false);
5012 #else
5013  return riscv_dsp_cfft_rd2_q31(src, m);
5014 #endif
5015 
5016 #endif
5017 }
5018 
5032 static inline int32_t hpm_dsp_cifft_rd2_q31(q31_t *src, uint32_t m)
5033 {
5034 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5035 #ifdef __zcc__
5036  return tpt_cfft_q31(src, m, true);
5037 #else
5038  return riscv_dsp_cifft_rd2_q31(src, m);
5039 #endif
5040 
5041 #endif
5042 }
5043 
5073 static inline int32_t hpm_dsp_cfft_rd4_f32(float32_t *src, uint32_t m)
5074 {
5075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5076 #ifdef __zcc__
5077  return tpt_cfft_f32(src, m, false);
5078 #else
5079  return riscv_dsp_cfft_rd4_f32(src, m);
5080 #endif
5081 
5082 #endif
5083 }
5084 
5092 static inline int32_t hpm_dsp_cifft_rd4_f32(float32_t *src, uint32_t m)
5093 {
5094 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5095 #ifdef __zcc__
5096  return tpt_cfft_f32(src, m, true);
5097 #else
5098  return riscv_dsp_cifft_rd4_f32(src, m);
5099 #endif
5100 #endif
5101 }
5102 
5116 static inline int32_t hpm_dsp_cfft_rd4_q15(q15_t *src, uint32_t m)
5117 {
5118 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5119 #ifdef __zcc__
5120  return tpt_cfft_q15(src, m, false);
5121 #else
5122  return riscv_dsp_cfft_rd4_q15(src, m);
5123 #endif
5124 #endif
5125 }
5126 
5140 static inline int32_t hpm_dsp_cifft_rd4_q15(q15_t *src, uint32_t m)
5141 {
5142 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5143 #ifdef __zcc__
5144  return tpt_cfft_q15(src, m, true);
5145 #else
5146  return riscv_dsp_cifft_rd4_q15(src, m);
5147 #endif
5148 #endif
5149 }
5150 
5164 static inline int32_t hpm_dsp_cfft_rd4_q31(q31_t *src, uint32_t m)
5165 {
5166 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5167 #ifdef __zcc__
5168  return tpt_cfft_q31(src, m, false);
5169 #else
5170  return riscv_dsp_cfft_rd4_q31(src, m);
5171 #endif
5172 #endif
5173 }
5174 
5188 static inline int32_t hpm_dsp_cifft_rd4_q31(q31_t *src, uint32_t m)
5189 {
5190 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5191 #ifdef __zcc__
5192  return tpt_cfft_q31(src, m, true);
5193 #else
5194  return riscv_dsp_cifft_rd4_q31(src, m);
5195 #endif
5196 #endif
5197 }
5198 
5219 static inline void hpm_dsp_cfft_f32(float32_t *src, uint32_t m)
5220 {
5221 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5222 #ifdef __zcc__
5223  tpt_cfft_f32(src, m, false);
5224 #else
5225  riscv_dsp_cfft_f32(src, m);
5226 #endif
5227 #endif
5228 }
5229 
5236 static inline void hpm_dsp_cfft_f64(float64_t *src, uint32_t m)
5237 {
5238 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5239 #ifdef __zcc__
5240  tpt_cfft_f64(src, m, false);
5241 #else
5242  riscv_dsp_cfft_f64(src, m);
5243 #endif
5244 #endif
5245 }
5246 
5253 static inline void hpm_dsp_cifft_f32(float32_t *src, uint32_t m)
5254 {
5255 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5256 #ifdef __zcc__
5257  tpt_cfft_f32(src, m, true);
5258 #else
5259  riscv_dsp_cifft_f32(src, m);
5260 #endif
5261 #endif
5262 }
5263 
5270 static inline void hpm_dsp_cifft_f64(float64_t *src, uint32_t m)
5271 {
5272 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5273 #ifdef __zcc__
5274  tpt_cfft_f64(src, m, true);
5275 #else
5276  riscv_dsp_cifft_f64(src, m);
5277 #endif
5278 #endif
5279 }
5280 
5281 
5294 static inline void hpm_dsp_cfft_q15(q15_t *src, uint32_t m)
5295 {
5296 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5297 #ifdef __zcc__
5298  tpt_cfft_q15(src, m, false);
5299 #else
5300  riscv_dsp_cfft_q15(src, m);
5301 #endif
5302 #endif
5303 }
5304 
5317 static inline void hpm_dsp_cifft_q15(q15_t *src, uint32_t m)
5318 {
5319 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5320 #ifdef __zcc__
5321  tpt_cfft_q15(src, m, true);
5322 #else
5323  riscv_dsp_cifft_q15(src, m);
5324 #endif
5325 #endif
5326 }
5327 
5340 static inline void hpm_dsp_cfft_q31(q31_t *src, uint32_t m)
5341 {
5342 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5343 #ifdef __zcc__
5344  tpt_cfft_q31(src, m, false);
5345 #else
5346  riscv_dsp_cfft_q31(src, m);
5347 #endif
5348 #endif
5349 }
5350 
5363 static inline void hpm_dsp_cifft_q31(q31_t *src, uint32_t m)
5364 {
5365 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5366 #ifdef __zcc__
5367  tpt_cfft_q31(src, m, true);
5368 #else
5369  riscv_dsp_cifft_q31(src, m);
5370 #endif
5371 #endif
5372 }
5373 
5403 static inline int32_t hpm_dsp_rfft_f32(float32_t *src, uint32_t m)
5404 {
5405 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5406 #ifdef __zcc__
5407  return tpt_rfft_f32(src, src, m, false);
5408 #else
5409  return riscv_dsp_rfft_f32(src, m);
5410 #endif
5411 #endif
5412 }
5413 
5421 static inline int32_t hpm_dsp_rfft_f64(float64_t *src, uint32_t m)
5422 {
5423 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5424  return riscv_dsp_rfft_f64(src, m);
5425 #endif
5426 }
5427 
5435 static inline int32_t hpm_dsp_rifft_f32(float32_t *src, uint32_t m)
5436 {
5437 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5438  return riscv_dsp_rifft_f32(src, m);
5439 #endif
5440 }
5441 
5449 static inline int32_t hpm_dsp_rifft_f64(float64_t *src, uint32_t m)
5450 {
5451 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5452  return riscv_dsp_rifft_f64(src, m);
5453 #endif
5454 }
5455 
5469 static inline int32_t hpm_dsp_rfft_q15(q15_t *src, uint32_t m)
5470 {
5471 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5472  return riscv_dsp_rfft_q15(src, m);
5473 #endif
5474 }
5475 
5489 static inline int32_t hpm_dsp_rifft_q15(q15_t *src, uint32_t m)
5490 {
5491 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5492  return riscv_dsp_rifft_q15(src, m);
5493 #endif
5494 }
5495 
5509 static inline int32_t hpm_dsp_rfft_q31(q31_t *src, uint32_t m)
5510 {
5511 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5512  return riscv_dsp_rfft_q31(src, m);
5513 #endif
5514 }
5515 
5529 static inline int32_t hpm_dsp_rifft_q31(q31_t *src, uint32_t m)
5530 {
5531 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5532  return riscv_dsp_rifft_q31(src, m);
5533 #endif
5534 }
5535 
5554 static inline void hpm_dsp_dct_f32(float32_t *src, uint32_t m)
5555 {
5556 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5557  riscv_dsp_dct_f32(src, m);
5558 #endif
5559 }
5560 
5567 static inline void hpm_dsp_idct_f32(float32_t *src, uint32_t m)
5568 {
5569 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5570  riscv_dsp_idct_f32(src, m);
5571 #endif
5572 }
5573 
5586 static inline void hpm_dsp_dct_q15(q15_t *src, uint32_t m)
5587 {
5588 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5589  riscv_dsp_dct_q15(src, m);
5590 #endif
5591 }
5592 
5605 static inline void hpm_dsp_idct_q15(q15_t *src, uint32_t m)
5606 {
5607 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5608  riscv_dsp_idct_q15(src, m);
5609 #endif
5610 }
5611 
5624 static inline void hpm_dsp_dct_q31(q31_t *src, uint32_t m)
5625 {
5626 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5627  riscv_dsp_dct_q31(src, m);
5628 #endif
5629 }
5630 
5643 static inline void hpm_dsp_idct_q31(q31_t *src, uint32_t m)
5644 {
5645 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5646  riscv_dsp_idct_q31(src, m);
5647 #endif
5648 }
5649 
5668 static inline void hpm_dsp_dct4_f32(float32_t *src, uint32_t m)
5669 {
5670 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5671  riscv_dsp_dct4_f32(src, m);
5672 #endif
5673 }
5674 
5681 static inline void hpm_dsp_idct4_f32(float32_t *src, uint32_t m)
5682 {
5683 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5684  riscv_dsp_idct4_f32(src, m);
5685 #endif
5686 }
5687 
5700 static inline void hpm_dsp_dct4_q15(q15_t *src, uint32_t m)
5701 {
5702 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5703  riscv_dsp_dct4_q15(src, m);
5704 #endif
5705 }
5706 
5719 static inline void hpm_dsp_idct4_q15(q15_t *src, uint32_t m)
5720 {
5721 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5722  riscv_dsp_idct4_q15(src, m);
5723 #endif
5724 }
5725 
5738 static inline void hpm_dsp_dct4_q31(q31_t *src, uint32_t m)
5739 {
5740 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5741  riscv_dsp_dct4_q31(src, m);
5742 #endif
5743 }
5744 
5757 static inline void hpm_dsp_idct4_q31(q31_t *src, uint32_t m)
5758 {
5759 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5760  riscv_dsp_idct4_q31(src, m);
5761 #endif
5762 }
5763 
5775 void hpm_software_cfft_float(float *src, uint32_t m);
5776 
5777 #endif
5778 
5779 #if defined(HPMSOC_HAS_HPMSDK_FFA) && defined(HPM_EN_MATH_DSP_LIB)
5780 
5781 #include "hpm_ffa_drv.h"
5782 #include "hpm_soc.h"
5795 static inline void hpm_ffa_cfft_q15(q15_t *src, uint32_t m)
5796 {
5797  fft_xfer_t xfer = { 0 };
5798  xfer.num_points = 1 << m;
5799  xfer.src = src;
5800  xfer.dst = src;
5801  xfer.is_ifft = false;
5805 }
5814 static inline void hpm_ffa_cfft_q31(q31_t *src, uint32_t m)
5815 {
5816  fft_xfer_t xfer = { 0 };
5817  xfer.num_points = 1 << m;
5818  xfer.src = src;
5819  xfer.dst = src;
5820  xfer.is_ifft = false;
5824 }
5825 
5826 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5827 static inline void hpm_ffa_cfft_f32(float *src, uint32_t m)
5828 {
5829  fft_xfer_t xfer = { 0 };
5830  xfer.num_points = 1 << m;
5831  xfer.src = src;
5832  xfer.dst = src;
5833  xfer.is_ifft = false;
5834  xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5835  xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5836  ffa_enable_fp_bias(HPM_FFA);
5837  ffa_set_coef_max_index(HPM_FFA, 0);
5838  ffa_set_output_max_index(HPM_FFA, 20);
5839  ffa_set_input_max_index(HPM_FFA, 20 - m);
5841 }
5842 #endif
5851 static inline void hpm_ffa_cifft_q15(q15_t *src, uint32_t m)
5852 {
5853  fft_xfer_t xfer = { 0 };
5854  xfer.num_points = 1 << m;
5855  xfer.src = src;
5856  xfer.dst = src;
5857  xfer.is_ifft = true;
5861 }
5862 
5871 static inline void hpm_ffa_cifft_q31(q31_t *src, uint32_t m)
5872 {
5873  fft_xfer_t xfer = { 0 };
5874  xfer.num_points = 1 << m;
5875  xfer.src = src;
5876  xfer.dst = src;
5877  xfer.is_ifft = true;
5881 }
5882 
5883 #if defined(HPM_IP_FEATURE_FFA_FP32) && HPM_IP_FEATURE_FFA_FP32
5884 static inline void hpm_ffa_cifft_f32(float *src, uint32_t m)
5885 {
5886  fft_xfer_t xfer = { 0 };
5887  xfer.num_points = 1 << m;
5888  xfer.src = src;
5889  xfer.dst = src;
5890  xfer.is_ifft = true;
5891  xfer.src_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5892  xfer.dst_data_type = FFA_DATA_TYPE_COMPLEX_FP32;
5893  ffa_enable_fp_bias(HPM_FFA);
5894  ffa_set_coef_max_index(HPM_FFA, 0x0);
5895  ffa_set_output_max_index(HPM_FFA, 10);
5896  ffa_set_input_max_index(HPM_FFA, 20);
5898 }
5899 #endif
5900 
5901 #endif
5902 
5903 #endif
5904 
5910 #ifdef HPM_MATH_DSP_UTILS
5911 
5920 #ifdef HPM_EN_MATH_DSP_LIB
5921 #ifdef __zcc__
5922 #include <tpt_math.h>
5923 #endif
5924 #include "riscv_dsp_utils_math.h"
5925 // Cosine and Sine
5926 static inline float32_t hpm_dsp_cos_f32(float32_t src)
5927 {
5928 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5929  return riscv_dsp_cos_f32(src);
5930 #endif
5931 }
5932 static inline q31_t hpm_dsp_cos_q31(q31_t src)
5933 {
5934 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5935  return riscv_dsp_cos_q31(src);
5936 #endif
5937 }
5938 static inline q15_t hpm_dsp_cos_q15(q15_t src)
5939 {
5940 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5941  return riscv_dsp_cos_q15(src);
5942 #endif
5943 }
5944 
5945 static inline float32_t hpm_dsp_sin_f32(float32_t src)
5946 {
5947 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5948  return riscv_dsp_sin_f32(src);
5949 #endif
5950 }
5951 
5952 #if defined (__riscv_zfh)
5957 static inline float16_t hpm_dsp_sin_f16(float16_t src)
5958 {
5959 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5960  return riscv_dsp_sin_f16(src);
5961 #endif
5962 }
5963 #endif
5964 
5965 static inline q31_t hpm_dsp_sin_q31(q31_t src)
5966 {
5967 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5968  return riscv_dsp_sin_q31(src);
5969 #endif
5970 }
5971 static inline q15_t hpm_dsp_sin_q15(q15_t src)
5972 {
5973 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5974  return riscv_dsp_sin_q15(src);
5975 #endif
5976 }
5977 
5978 // Arc tangent
5979 static inline float32_t hpm_dsp_atan_f32(float32_t src)
5980 {
5981 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5982  return riscv_dsp_atan_f32(src);
5983 #endif
5984 }
5985 static inline q31_t hpm_dsp_atan_q31(q31_t src)
5986 {
5987 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5988  return riscv_dsp_atan_q31(src);
5989 #endif
5990 }
5991 static inline q15_t hpm_dsp_atan_q15(q15_t src)
5992 {
5993 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
5994  return riscv_dsp_atan_q15(src);
5995 #endif
5996 }
5997 static inline float32_t hpm_dsp_atan2_f32(float32_t srcy, float32_t src2)
5998 {
5999 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6000  return riscv_dsp_atan2_f32(srcy, src2);
6001 #endif
6002 }
6003 static inline q15_t hpm_dsp_atan2_q15(q15_t srcy, q15_t src2)
6004 {
6005 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6006  return riscv_dsp_atan2_q15(srcy, src2);
6007 #endif
6008 }
6009 static inline q31_t hpm_dsp_atan2_q31(q31_t srcy, q31_t src2)
6010 {
6011 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6012  return riscv_dsp_atan2_q31(srcy, src2);
6013 #endif
6014 }
6015 
6016 // Square Root
6022 static inline float32_t hpm_dsp_sqrt_f32(float32_t src)
6023 {
6024 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6025  return riscv_dsp_sqrt_f32(src);
6026 #endif
6027 }
6028 
6034 static inline q31_t hpm_dsp_sqrt_q31(q31_t src)
6035 {
6036 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6037  return riscv_dsp_sqrt_q31(src);
6038 #endif
6039 }
6040 
6046 static inline q15_t hpm_dsp_sqrt_q15(q15_t src)
6047 {
6048 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6049  return riscv_dsp_sqrt_q15(src);
6050 #endif
6051 }
6052 
6053 // Convert function
6060 static inline void hpm_dsp_convert_f32_q15(float32_t *src, q15_t *dst, uint32_t size)
6061 {
6062 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6063  riscv_dsp_convert_f32_q15(src, dst, size);
6064 #endif
6065 }
6066 
6073 static inline void hpm_dsp_convert_f32_q31(float32_t *src, q31_t *dst, uint32_t size)
6074 {
6075 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6076 #ifdef __zcc__
6077  tpt_f32_to_q31(dst, src, size);
6078 #else
6079  riscv_dsp_convert_f32_q31(src, dst, size);
6080 #endif
6081 #endif
6082 }
6083 
6090 static inline void hpm_dsp_convert_f32_q7(float32_t *src, q7_t *dst, uint32_t size)
6091 {
6092 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6093  riscv_dsp_convert_f32_q7(src, dst, size);
6094 #endif
6095 }
6096 
6103 static inline void hpm_dsp_convert_q15_f32(q15_t *src, float32_t *dst, uint32_t size)
6104 {
6105 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6106  riscv_dsp_convert_q15_f32(src, dst, size);
6107 #endif
6108 }
6109 
6116 static inline void hpm_dsp_convert_q15_q31(q15_t *src, q31_t *dst, uint32_t size)
6117 {
6118 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6119  riscv_dsp_convert_q15_q31(src, dst, size);
6120 #endif
6121 }
6122 
6129 static inline void hpm_dsp_convert_q15_q7(q15_t *src, q7_t *dst, uint32_t size)
6130 {
6131 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6132  riscv_dsp_convert_q15_q7(src, dst, size);
6133 #endif
6134 }
6135 
6142 static inline void hpm_dsp_convert_q31_f32(q31_t *src, float32_t *dst, uint32_t size)
6143 {
6144 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6145 #ifdef __zcc__
6146  tpt_q31_to_f32(dst, src, size);
6147 #else
6148  riscv_dsp_convert_q31_f32(src, dst, size);
6149 #endif
6150 #endif
6151 }
6152 
6159 static inline void hpm_dsp_convert_q31_q15(q31_t *src, q15_t *dst, uint32_t size)
6160 {
6161 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6162  riscv_dsp_convert_q31_q15(src, dst, size);
6163 #endif
6164 }
6165 
6172 static inline void hpm_dsp_convert_q31_q7(q31_t *src, q7_t *dst, uint32_t size)
6173 {
6174 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6175  riscv_dsp_convert_q31_q7(src, dst, size);
6176 #endif
6177 }
6178 
6185 static inline void hpm_dsp_convert_q7_f32(q7_t *src, float32_t *dst, uint32_t size)
6186 {
6187 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6188  riscv_dsp_convert_q7_f32(src, dst, size);
6189 #endif
6190 }
6191 
6198 static inline void hpm_dsp_convert_q7_q15(q7_t *src, q15_t *dst, uint32_t size)
6199 {
6200 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6201  riscv_dsp_convert_q7_q15(src, dst, size);
6202 #endif
6203 }
6204 
6211 static inline void hpm_dsp_convert_q7_q31(q7_t *src, q31_t *dst, uint32_t size)
6212 {
6213 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6214  riscv_dsp_convert_q7_q31(src, dst, size);
6215 #endif
6216 }
6217 
6218 // Duplicate function
6225 static inline void hpm_dsp_dup_f32(float32_t *src, float32_t *dst, uint32_t size)
6226 {
6227 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6228  riscv_dsp_dup_f32(src, dst, size);
6229 #endif
6230 }
6231 
6238 static inline void hpm_dsp_dup_q15(q15_t *src, q15_t *dst, uint32_t size)
6239 {
6240 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6241  riscv_dsp_dup_q15(src, dst, size);
6242 #endif
6243 }
6244 
6251 static inline void hpm_dsp_dup_q31(q31_t *src, q31_t *dst, uint32_t size)
6252 {
6253 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6254  riscv_dsp_dup_q31(src, dst, size);
6255 #endif
6256 }
6257 
6264 static inline void hpm_dsp_dup_q7(q7_t *src, q7_t *dst, uint32_t size)
6265 {
6266 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6267  riscv_dsp_dup_q7(src, dst, size);
6268 #endif
6269 }
6270 
6271 // Set function
6278 static inline void hpm_dsp_set_f32(float32_t val, float32_t *dst, uint32_t size)
6279 {
6280 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6281  riscv_dsp_set_f32(val, dst, size);
6282 #endif
6283 }
6284 
6291 static inline void hpm_dsp_set_q15(q15_t val, q15_t *dst, uint32_t size)
6292 {
6293 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6294  riscv_dsp_set_q15(val, dst, size);
6295 #endif
6296 }
6297 
6304 static inline void hpm_dsp_set_q31(q31_t val, q31_t *dst, uint32_t size)
6305 {
6306 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6307  riscv_dsp_set_q31(val, dst, size);
6308 #endif
6309 }
6310 
6317 static inline void hpm_dsp_set_q7(q7_t val, q7_t *dst, uint32_t size)
6318 {
6319 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6320  riscv_dsp_set_q7(val, dst, size);
6321 #endif
6322 }
6323 
6332 static inline float32_t hpm_dsp_weighted_sum_f32(const float32_t *src, const float32_t *weight, uint32_t size)
6333 {
6334 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6335  return riscv_dsp_weighted_sum_f32(src, weight, size);
6336 #endif
6337 }
6338 
6348 static inline void hpm_dsp_barycenter_f32(const float32_t *src, const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
6349 {
6350 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6351  riscv_dsp_barycenter_f32(src, weights, out, numofvec, dimofvec);
6352 #endif
6353 }
6354 
6360 static inline float32_t hpm_dsp_exp_f32(float32_t src)
6361 {
6362 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6363  return riscv_dsp_exp_f32(src);
6364 #endif
6365 }
6366 
6367 #if defined (__riscv_zfh)
6373 static inline float16_t hpm_dsp_exp_f16(float16_t src)
6374 {
6375 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6376  return riscv_dsp_exp_f16(src);
6377 #endif
6378 }
6379 #endif
6380 
6386 static inline float32_t hpm_dsp_sigmoid_f32(float32_t src)
6387 {
6388 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6389  return riscv_dsp_sigmoid_f32(src);
6390 #endif
6391 }
6392 
6393 #if defined (__riscv_zfh)
6399 static inline float16_t hpm_dsp_sigmoid_f16(float16_t src)
6400 {
6401 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6402  return riscv_dsp_sigmoid_f16(src);
6403 #endif
6404 }
6405 #endif
6406 
6412 static inline float32_t hpm_dsp_log_f32(float32_t src)
6413 {
6414 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6415  return riscv_dsp_log_f32(src);
6416 #endif
6417 }
6418 
6419 #if defined (__riscv_zfh)
6425 static inline float16_t hpm_dsp_log_f16(float16_t src)
6426 {
6427 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6428  return riscv_dsp_log_f16(src);
6429 #endif
6430 }
6431 #endif
6432 
6439 #endif
6440 #endif
6441 
6442 #ifdef HPM_MATH_DSP_SORT
6443 
6454 #ifdef HPM_EN_MATH_DSP_LIB
6455 #include "riscv_dsp_sort_math.h"
6478 static inline void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t * instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
6479 {
6480 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6481  riscv_dsp_sort_init_f32(instance, alg, order);
6482 #endif
6483 }
6484 
6525 static inline void hpm_dsp_sort_f32(const riscv_dsp_sort_f32_t * instance,float32_t * src, float32_t * dst, uint32_t size)
6526 {
6527 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6528  riscv_dsp_sort_f32(instance, src, dst, size);
6529 #endif
6530 }
6531 
6547 static inline void hpm_dsp_sort_merge_init_f32(riscv_dsp_sort_merge_f32_t * instance, riscv_dsp_sort_order order, float32_t * buf)
6548 {
6549 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6550  riscv_dsp_sort_merge_init_f32(instance, order, buf);
6551 #endif
6552 }
6553 
6587 static inline void hpm_dsp_sort_merge_f32(const riscv_dsp_sort_merge_f32_t * instance, float32_t * src, float32_t * dst, uint32_t size)
6588 {
6589 #if HPM_DSP_CORE == HPM_DSP_HW_NDS32
6590  riscv_dsp_sort_merge_f32(instance, src, dst, size);
6591 #endif
6592 }
6593 
6594 #endif
6595 #endif
6596 
6597 #ifdef HPM_MATH_NN_TINYENGINE
6598 #ifdef HPM_EN_MATH_DSP_LIB
6599 
6600 #include "riscv_math_types.h"
6601 #include <string.h>
6602 #include "riscv_simd_convert.h"
6603 
6604 #define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
6605 #define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
6606 #define Q31_MAX ((q31_t)(0x7FFFFFFFL))
6607 #define Q31_MIN ((q31_t)(0x80000000L))
6608 
6609 static inline void write_q15x2_ia(
6610  q15_t **pQ15,
6611  q31_t value)
6612 {
6613  q31_t val = value;
6614  (*pQ15)[0] = (val & 0x0FFFF);
6615  (*pQ15)[1] = (val >> 16) & 0x0FFFF;
6616  *pQ15 += 2;
6617 }
6618 
6625 __STATIC_FORCEINLINE q31_t hpm_nn_read_q15x2_ia(const q15_t **in_q15)
6626 {
6627  q31_t val;
6628 
6629  val = *(q31_t *)(*in_q15);
6630  *in_q15 += 2;
6631 
6632  return val;
6633 }
6634 
6643 __STATIC_FORCEINLINE q31_t hpm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
6644 {
6645  q31_t result = 0;
6646  q63_t mult = 1 << 30;
6647 
6648  if ((m1 < 0) ^ (m2 < 0)) {
6649  mult = 1 - mult;
6650  }
6651  mult = mult + (q63_t)m1 * m2;
6652  result = mult / (1UL << 31);
6653 
6654  if ((m1 == m2) && (m1 == (int32_t)Q31_MIN)) {
6655  result = Q31_MAX;
6656  }
6657  return result;
6658 }
6659 
6668 __STATIC_FORCEINLINE q31_t hpm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
6669 {
6670  q31_t result = 0;
6671 
6672  const q31_t remainder_mask = (1l << exponent) - 1;
6673  int32_t remainder = remainder_mask & dividend;
6674 
6675  result = dividend >> exponent;
6676  q31_t threshold = remainder_mask >> 1;
6677  if (result < 0) {
6678  threshold++;
6679  }
6680  if (remainder > threshold) {
6681  result++;
6682  }
6683 
6684  return result;
6685 }
6686 
6687 __STATIC_FORCEINLINE q31_t hpm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
6688 {
6689  return hpm_nn_divide_by_power_of_two(hpm_nn_sat_doubling_high_mult(val * (1 << LEFT_SHIFT(shift)), multiplier),
6690  RIGHT_SHIFT(shift));
6691 }
6692 
6698 __STATIC_FORCEINLINE q31_t hpm_nn_read_q7x4_ia(const q7_t **in_q7)
6699 {
6700  q31_t val;
6701 
6702  val = *(q31_t *)(*in_q7);
6703  *in_q7 += 4;
6704 
6705  return val;
6706 }
6707 
6712 __STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
6713 {
6714  q31_t inA = hpm_nn_read_q7x4_ia(&source);
6715 
6716  *out2 = __SXTB16_ROR(inA, 8);
6717  *out1 = __SXTB16(inA);
6718 
6719  return source;
6720 }
6721 
6726 __STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
6727 {
6728  q31_t inA = hpm_nn_read_q7x4_ia(&source);
6729  q31_t inAbuf1 = __SXTB16_ROR(inA, 8);
6730  q31_t inAbuf2 = __SXTB16(inA);
6731 
6732  *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
6733  *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
6734 
6735  return source;
6736 }
6737 
6743 __STATIC_FORCEINLINE int32_t hpm_nn_read_s8x4_ia(const int8_t **in_s8)
6744 {
6745  int32_t val;
6746 
6747  val = *(int32_t *)(*in_s8);
6748  *in_s8 += 4;
6749 
6750  return val;
6751 }
6752 
6753 __STATIC_FORCEINLINE void hpm_nn_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
6754 {
6755  int32_t block_cnt;
6756 
6757  /* Run the below code for cores that support SIMD instructions */
6758  int32_t in_q7x4;
6759  int32_t in_q15x2_1;
6760  int32_t in_q15x2_2;
6761  int32_t out_q15x2_1;
6762  int32_t out_q15x2_2;
6763 
6764  /*loop unrolling */
6765  block_cnt = block_size >> 2;
6766 
6767  /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */
6768  const int32_t offset_q15x2 = __PKHBT(offset, offset, 16);
6769  while (block_cnt > 0) {
6770  /* convert from s8 to s16 and then store the results in the destination buffer */
6771  in_q7x4 = hpm_nn_read_s8x4_ia(&src);
6772 
6773  /* Extract and sign extend each of the four s8 values to s16 */
6774  in_q15x2_1 = __SXTAB16(offset_q15x2, __ROR(in_q7x4, 8));
6775  in_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4);
6776 
6777  out_q15x2_2 = __PKHTB(in_q15x2_1, in_q15x2_2, 16);
6778  out_q15x2_1 = __PKHBT(in_q15x2_2, in_q15x2_1, 16);
6779 
6780  write_q15x2_ia(&dst, out_q15x2_1);
6781  write_q15x2_ia(&dst, out_q15x2_2);
6782 
6783  block_cnt--;
6784  }
6785  /* Handle left over samples */
6786  block_cnt = block_size % 0x4;
6787 
6788  while (block_cnt > 0) {
6789  *dst++ = (int16_t)*src++ + offset;
6790 
6791  /* Decrement the loop counter */
6792  block_cnt--;
6793  }
6794 }
6795 
6796 #endif
6797 #endif
6798 
6799 #ifdef HPM_MATH_NN_ACTIVATION
6800 #ifdef HPM_EN_MATH_NN_LIB
6801 #if defined(__zcc__)
6802 #include "tpt_nn_activation.h"
6803 #else
6804 #include "riscv_nn_activation.h"
6805 #endif
6837 static inline void hpm_nn_activate_s8(q7_t *in_out,
6838  uint32_t size,
6839  uint16_t int_bits,
6840  riscv_nn_activation_fun act_fun)
6841 {
6842 #if defined(__zcc__)
6843  tpt_nn_activate_s8(in_out, size, int_bits, act_fun);
6844 #else
6845  riscv_nn_activate_s8(in_out, size, int_bits, act_fun);
6846 #endif
6847 }
6848 
6864 static inline void hpm_nn_activate_s16(q15_t *in_out,
6865  uint32_t size,
6866  uint16_t int_bits,
6867  riscv_nn_activation_fun act_fun)
6868 {
6869 #if defined(__zcc__)
6870  tpt_nn_activate_s16(in_out, size, int_bits, act_fun);
6871 #else
6872  riscv_nn_activate_s16(in_out, size, int_bits, act_fun);
6873 #endif
6874 }
6875 
6893 static inline void hpm_nn_leaky_relu_s8(q7_t *in_out,
6894  uint32_t size,
6895  q15_t slope)
6896 #if defined(__zcc__)
6897  tpt_nn_leaky_relu_q7(in_out, in_out, size, slope);
6898 #else
6899  riscv_nn_leaky_relu_s8(in_out, size, slope);
6900 #endif
6901 }
6902 
6910 static inline void hpm_nn_relu_any_s8(q7_t *data, uint16_t size, q7_t max_val)
6911 {
6912 #if defined(__zcc__)
6913  tpt_nn_relu_any_q7(data, size, max_val);
6914 #else
6915  riscv_nn_relu_any_s8(data, size, max_val);
6916 #endif
6917 }
6918 
6935 static inline void hpm_nn_relu_s8(q7_t *in_out, uint32_t size)
6936 {
6937 #if defined(__zcc__)
6938  tpt_nn_relu_q7(in_out, size);
6939 #else
6940  riscv_nn_relu_s8(in_out, size);
6941 #endif
6942 }
6943 
6950 static inline void hpm_nn_relu_s16(q15_t *in_out, uint32_t size)
6951 {
6952 #if defined(__zcc__)
6953  tpt_nn_relu_q15(in_out, size);
6954 #else
6955  riscv_nn_relu_s16(in_out, size);
6956 #endif
6957 }
6958 
6959 #ifdef __riscv_zfh
6969 static inline int32_t hpm_nn_sigmoid_f16(const float16_t *in_vec,
6970  uint32_t size,
6971  float16_t *out_vec)
6972 {
6973 #if defined(__zcc__)
6974  return tpt_nn_sigmoid_f16(in_vec, size, out_vec);
6975 #else
6976  return riscv_nn_sigmoid_f16(in_vec, size, out_vec);
6977 #endif
6978 }
6979 
6988 static inline int32_t hpm_nn_tanh_f16(const float16_t *in_vec,
6989  uint32_t size,
6990  float16_t *out_vec)
6991 {
6992 #if defined(__zcc__)
6993  return tpt_nn_tanh_f16(in_vec, size, out_vec);
6994 #else
6995  return riscv_nn_tanh_f16(in_vec, size, out_vec);
6996 #endif
6997 }
6998 #endif
6999 
7003 #endif
7004 #endif
7005 
7006 #ifdef HPM_MATH_NN_BASIC
7007 #ifdef HPM_EN_MATH_NN_LIB
7008 #if defined(__zcc__)
7009 #include "tpt_nn_basic.h"
7010 #else
7011 #include "riscv_nn_basic.h"
7012 #endif
7058 static inline void hpm_nn_add_s8_sym(const q7_t *in_tensor1,
7059  const q7_t *in_tensor2,
7060  const int16_t *scale1,
7061  const int16_t *scale2,
7062  const uint32_t size,
7063  const uint16_t pre_rshift,
7064  const uint16_t out_scale,
7065  const uint16_t post_rshift,
7066  q7_t *out)
7067 {
7068 #if defined(__zcc__)
7069  tpt_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2, size, pre_rshift,
7070  out_scale, post_rshift, out);
7071 #else
7072  riscv_nn_add_s8_sym(in_tensor1, in_tensor2, scale1, scale2, size, pre_rshift,
7073  out_scale, post_rshift, out);
7074 #endif
7075 }
7076 
7096 static inline void hpm_nn_add_s8_sym_round(const q7_t *in_tensor1,
7097  const q7_t *in_tensor2,
7098  const uint32_t scale1,
7099  const uint32_t scale2,
7100  const uint32_t size,
7101  const uint16_t pre_rshift,
7102  const uint16_t out_scale,
7103  const uint16_t post_rshift,
7104  q7_t *out)
7105 {
7106 #if defined(__zcc__)
7107  tpt_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2, size,
7108  pre_rshift, out_scale, post_rshift, out);
7109 #else
7110  riscv_nn_add_s8_sym_round(in_tensor1, in_tensor2, scale1, scale2, size,
7111  pre_rshift, out_scale, post_rshift, out);
7112 #endif
7113 }
7114 
7166 static inline int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1,
7167  const int8_t *in_tensor2,
7168  const int32_t in_offset1,
7169  const int32_t in_scale1,
7170  const int32_t in_rshift1,
7171  const int32_t in_offset2,
7172  const int32_t in_scale2,
7173  const int32_t in_rshift2,
7174  const int32_t lshift,
7175  int8_t *out,
7176  const int32_t out_offset,
7177  const int32_t out_scale,
7178  const int32_t out_rshift,
7179  const int32_t act_min,
7180  const int32_t act_max,
7181  const uint32_t size)
7182 {
7183 #if defined(__zcc__)
7184  return tpt_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7185  in_rshift1, in_offset2, in_scale2, in_rshift2,
7186  lshift, out, out_offset, out_scale, out_rshift,
7187  act_min, act_max, size);
7188 #else
7189  return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7190  in_rshift1, in_offset2, in_scale2, in_rshift2,
7191  lshift, out, out_offset, out_scale, out_rshift,
7192  act_min, act_max, size);
7193 #endif
7194 }
7195 
7235 static inline int hpm_nn_ew_mul_s8_asym(const int8_t *in_tensor1,
7236  const int8_t *in_tensor2,
7237  const int32_t in_offset1,
7238  const int32_t in_offset2,
7239  int8_t *out,
7240  const int32_t out_offset,
7241  const int32_t out_scale,
7242  const int32_t out_shift,
7243  const int32_t act_min,
7244  const int32_t act_max,
7245  const uint32_t size)
7246 {
7247 #if defined(__zcc__)
7248  return tpt_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7249  out, out_offset, out_scale, out_shift, act_min,
7250  act_max, size);
7251 #else
7252  return riscv_nn_ew_mul_s8_asym(in_tensor1, in_tensor2, in_offset1, in_offset2,
7253  out, out_offset, out_scale, out_shift, act_min,
7254  act_max, size);
7255 #endif
7256 }
7257 
7262 #endif
7263 
7264 #ifdef HPM_EN_MATH_NN_RVP32_LIB
7265 #if defined(__zcc__)
7266 #include "tpt_nn_basic.h"
7267 #else
7268 #include "riscv_nn_basic.h"
7269 #endif
7270 
7323 static inline int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1,
7324  const int8_t *in_tensor2,
7325  const int32_t in_offset1,
7326  const int32_t in_scale1,
7327  const int32_t in_rshift1,
7328  const int32_t in_offset2,
7329  const int32_t in_scale2,
7330  const int32_t in_rshift2,
7331  const int32_t lshift,
7332  int8_t *out,
7333  const int32_t out_offset,
7334  const int32_t out_scale,
7335  const int32_t out_rshift,
7336  const int32_t act_min,
7337  const int32_t act_max,
7338  const uint32_t size)
7339 {
7340 #if defined(__zcc__)
7341  return tpt_elementwise_add_s8(out, out_offset, out_scale, -out_rshift, act_min,
7342  act_max, in_tensor1, in_tensor2, in_offset1, in_scale1,
7343  in_rshift1, in_offset2, in_scale2, in_rshift2,
7344  lshift, size);
7345 #else
7346  return riscv_nn_ew_add_s8_asym(in_tensor1, in_tensor2, in_offset1, in_scale1,
7347  in_rshift1, in_offset2, in_scale2, in_rshift2,
7348  lshift, out, out_offset, out_scale, out_rshift,
7349  act_min, act_max, size);
7350 #endif
7351 }
7352 
7353 #endif
7354 
7355 #endif
7356 
7357 #ifdef HPM_MATH_NN_CONCATENATION
7358 #ifdef HPM_EN_MATH_NN_LIB
7359 #if defined(__zcc__)
7360 #include "tpt_nn_concatenation.h"
7361 #else
7362 #include "riscv_nn_concatenation.h"
7363 #endif
7364 
7390 static inline void hpm_nn_concate_s8_w(const int8_t *in_tensor,
7391  const uint16_t in_tensor_x,
7392  const uint16_t in_tensor_y,
7393  const uint16_t in_tensor_z,
7394  const uint16_t in_tensor_w,
7395  int8_t *out_tensor,
7396  const uint32_t out_offset_w)
7397 {
7398 #if defined(__zcc__)
7399  tpt_concatenation_s8_w(out_tensor, in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7400  in_tensor_w, out_offset_w);
7401 #else
7402  riscv_nn_concate_s8_w(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7403  in_tensor_w, out_tensor, out_offset_w);
7404 #endif
7405 }
7406 
7425 static inline void hpm_nn_concate_s8_x(const int8_t *in_tensor,
7426  const uint16_t in_tensor_x,
7427  const uint16_t in_tensor_y,
7428  const uint16_t in_tensor_z,
7429  const uint16_t in_tensor_w,
7430  int8_t *out_tensor,
7431  const uint16_t out_tensor_x,
7432  const uint32_t out_offset_x)
7433 {
7434 #if defined(__zcc__)
7435  tpt_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7436  in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7437 #else
7438  riscv_nn_concate_s8_x(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7439  in_tensor_w, out_tensor, out_tensor_x, out_offset_x);
7440 #endif
7441 }
7442 
7460 static inline void hpm_nn_concate_s8_y(const int8_t *in_tensor,
7461  const uint16_t in_tensor_x,
7462  const uint16_t in_tensor_y,
7463  const uint16_t in_tensor_z,
7464  const uint16_t in_tensor_w,
7465  int8_t *out_tensor,
7466  const uint16_t out_tensor_y,
7467  const uint32_t out_offset_y)
7468 {
7469 #if defined(__zcc__)
7470  tpt_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7471  in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7472 #else
7473  riscv_nn_concate_s8_y(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7474  in_tensor_w, out_tensor, out_tensor_y, out_offset_y);
7475 #endif
7476 }
7477 
7495 static inline void hpm_nn_concate_s8_z(const int8_t *in_tensor,
7496  const uint16_t in_tensor_x,
7497  const uint16_t in_tensor_y,
7498  const uint16_t in_tensor_z,
7499  const uint16_t in_tensor_w,
7500  int8_t *out_tensor,
7501  const uint16_t out_tensor_z,
7502  const uint32_t out_offset_z)
7503 {
7504 #if defined(__zcc__)
7505  tpt_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7506  in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7507 #else
7508  riscv_nn_concate_s8_z(in_tensor, in_tensor_x, in_tensor_y, in_tensor_z,
7509  in_tensor_w, out_tensor, out_tensor_z, out_offset_z);
7510 #endif
7511 }
7512 
7517 #endif
7518 #endif
7519 
7520 #ifdef HPM_MATH_NN_CONVOLUTION
7521 #ifdef HPM_EN_MATH_NN_LIB
7522 #if defined(__zcc__)
7523 #include "tpt_nn_convolution.h"
7524 #else
7525 #include "riscv_nn_convolution.h"
7526 #endif
7527 
7615 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor,
7616  const uint16_t in_tensor_dim_x,
7617  const uint16_t in_tensor_dim_y,
7618  const uint16_t in_tensor_ch,
7619  const q7_t *ker_weight,
7620  const uint16_t out_tensor_ch,
7621  const uint16_t ker_dim_x,
7622  const uint16_t ker_dim_y,
7623  const uint16_t pad_x,
7624  const uint16_t pad_y,
7625  const uint16_t stride_x,
7626  const uint16_t stride_y,
7627  const q7_t *bias,
7628  const uint16_t bias_lshift,
7629  const uint16_t out_rshift,
7630  q7_t *out_tensor,
7631  const uint16_t out_tensor_dim_x,
7632  const uint16_t out_tensor_dim_y,
7633  q15_t *in_tmp_buf,
7634  q7_t *tmp_buf)
7635 {
7636 #if defined(__zcc__)
7637  return tpt_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7638  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7639  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7640  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
7641  out_tensor_dim_y, in_tmp_buf, tmp_buf);
7642 #else
7643  return riscv_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(
7644  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7645  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7646  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
7647  out_tensor_dim_y, in_tmp_buf, tmp_buf);
7648 #endif
7649 }
7650 
7702 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(const q7_t *in_tensor,
7703  const uint16_t in_tensor_dim,
7704  const q7_t *ker_weight,
7705  const uint16_t out_tensor_ch,
7706  const uint16_t ker_dim,
7707  const uint16_t pad,
7708  const uint16_t stride,
7709  const q7_t *bias,
7710  const uint16_t bias_lshift,
7711  const uint16_t out_rshift,
7712  q7_t *out_tensor,
7713  const uint16_t out_tensor_dim,
7714  q15_t *in_tmp_buf,
7715  q7_t *tmp_buf)
7716 {
7717 #if defined(__zcc__)
7718  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7719  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7720  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7721  tmp_buf);
7722 #else
7723  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(
7724  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7725  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7726  tmp_buf);
7727 #endif
7728 }
7729 
7781 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(const q7_t *in_tensor,
7782  const uint16_t in_tensor_dim,
7783  const q7_t *ker_weight,
7784  const uint16_t out_tensor_ch,
7785  const uint16_t ker_dim,
7786  const uint16_t pad,
7787  const uint16_t stride,
7788  const q7_t *bias,
7789  const uint16_t bias_lshift,
7790  const uint16_t out_rshift,
7791  q7_t *out_tensor,
7792  const uint16_t out_tensor_dim,
7793  q15_t *in_tmp_buf,
7794  q15_t *wt_tmp_buf)
7795 {
7796 #if defined(__zcc__)
7797  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7798  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7799  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7800  wt_tmp_buf);
7801 #else
7802  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(
7803  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
7804  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim, in_tmp_buf,
7805  wt_tmp_buf);
7806 #endif
7807 }
7808 
7809 
7860 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor,
7861  const uint16_t in_tensor_dim,
7862  const uint16_t in_tensor_ch,
7863  const q7_t *ker_weight,
7864  const uint16_t out_tensor_ch,
7865  const uint16_t ker_dim,
7866  const uint16_t pad,
7867  const uint16_t stride,
7868  const q7_t *bias,
7869  const uint16_t bias_lshift,
7870  const uint16_t out_rshift,
7871  q7_t *out_tensor,
7872  const uint16_t out_tensor_dim,
7873  q15_t *in_tmp_buf,
7874  q7_t *tmp_buf)
7875 {
7876 #if defined(__zcc__)
7877  return tpt_nn_conv_HWC_s8_s8_s8_sft_bias(
7878  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7879  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
7880  out_tensor_dim, in_tmp_buf, tmp_buf);
7881 #else
7882  return riscv_nn_conv_HWC_s8_s8_s8_sft_bias(
7883  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
7884  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
7885  out_tensor_dim, in_tmp_buf, tmp_buf);
7886 #endif
7887 }
7888 
7950 static inline void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor,
7951  const uint16_t in_tensor_dim_x,
7952  const uint16_t in_tensor_dim_y,
7953  const uint16_t in_tensor_ch,
7954  const q7_t *ker_weight,
7955  const uint16_t out_tensor_ch,
7956  const uint16_t ker_dim_x,
7957  const uint16_t ker_dim_y,
7958  const uint16_t pad_x,
7959  const uint16_t pad_y,
7960  const uint16_t stride_x,
7961  const uint16_t stride_y,
7962  const q7_t *bias,
7963  const uint16_t bias_lshift,
7964  const uint16_t out_rshift,
7965  q7_t *out_tensor,
7966  const uint16_t out_tensor_dim_x,
7967  const uint16_t out_tensor_dim_y,
7968  q15_t *in_tmp_buf,
7969  q7_t *tmp_buf)
7970 {
7971 #if defined(__zcc__)
7972  tpt_nn_conv_HWC_s8_s8_s8_sft_bias_any(
7973  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7974  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7975  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
7976  out_tensor_dim_y, in_tmp_buf, tmp_buf);
7977 #else
7978  riscv_nn_conv_HWC_s8_s8_s8_sft_bias_any(
7979  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
7980  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
7981  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
7982  out_tensor_dim_y, in_tmp_buf, tmp_buf);
7983 #endif
7984 }
7985 
8038 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast(const q7_t *in_tensor,
8039  const uint16_t in_tensor_dim,
8040  const uint16_t in_tensor_ch,
8041  const q7_t *ker_weight,
8042  const uint16_t out_tensor_ch,
8043  const uint16_t ker_dim,
8044  const uint16_t pad,
8045  const uint16_t stride,
8046  const q7_t *bias,
8047  const uint16_t bias_lshift,
8048  const uint16_t out_rshift,
8049  q7_t *out_tensor,
8050  const uint16_t out_tensor_dim,
8051  q15_t *in_tmp_buf,
8052  q7_t *tmp_buf)
8053 {
8054 #if defined(__zcc__)
8055  return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8056  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8057  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8058  out_tensor_dim, in_tmp_buf, tmp_buf);
8059 #else
8060  return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast(
8061  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8062  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8063  out_tensor_dim, in_tmp_buf, tmp_buf);
8064 #endif
8065 }
8066 
8133 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor,
8134  const uint16_t in_tensor_dim_x,
8135  const uint16_t in_tensor_dim_y,
8136  const uint16_t in_tensor_ch,
8137  const q7_t *ker_weight,
8138  const uint16_t out_tensor_ch,
8139  const uint16_t ker_dim_x,
8140  const uint16_t ker_dim_y,
8141  const uint16_t pad_x,
8142  const uint16_t pad_y,
8143  const uint16_t stride_x,
8144  const uint16_t stride_y,
8145  const q7_t *bias,
8146  const uint16_t bias_lshift,
8147  const uint16_t out_rshift,
8148  q7_t *out_tensor,
8149  const uint16_t out_tensor_dim_x,
8150  const uint16_t out_tensor_dim_y,
8151  q15_t *in_tmp_buf,
8152  q7_t *tmp_buf)
8153 {
8154 #if defined(__zcc__)
8155  return tpt_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8156  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8157  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8158  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8159  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8160 #else
8161  return riscv_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(
8162  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8163  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8164  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8165  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8166 #endif
8167 }
8168 
8169 
8220 static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias(const q15_t *in_tensor,
8221  const uint16_t in_tensor_dim,
8222  const uint16_t in_tensor_ch,
8223  const q15_t *ker_weight,
8224  const uint16_t out_tensor_ch,
8225  const uint16_t ker_dim,
8226  const uint16_t pad,
8227  const uint16_t stride,
8228  const q15_t *bias,
8229  const uint16_t bias_lshift,
8230  const uint16_t out_rshift,
8231  q15_t *out_tensor,
8232  const uint16_t out_tensor_dim,
8233  q15_t *in_tmp_buf,
8234  q7_t *tmp_buf)
8235 {
8236 #if defined(__zcc__)
8237  return tpt_nn_conv_HWC_s16_s16_s16_sft_bias(
8238  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8239  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8240  out_tensor_dim, in_tmp_buf, tmp_buf);
8241 #else
8242  return riscv_nn_conv_HWC_s16_s16_s16_sft_bias(
8243  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8244  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8245  out_tensor_dim, in_tmp_buf, tmp_buf);
8246 #endif
8247 }
8248 
8301 static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast(const q15_t *in_tensor,
8302  const uint16_t in_tensor_dim,
8303  const uint16_t in_tensor_ch,
8304  const q15_t *ker_weight,
8305  const uint16_t out_tensor_ch,
8306  const uint16_t ker_dim,
8307  const uint16_t pad,
8308  const uint16_t stride,
8309  const q15_t *bias,
8310  const uint16_t bias_lshift,
8311  const uint16_t out_rshift,
8312  q15_t *out_tensor,
8313  const uint16_t out_tensor_dim,
8314  q15_t *in_tmp_buf,
8315  q7_t *tmp_buf)
8316 {
8317 #if defined(__zcc__)
8318  return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8319  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8320  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8321  out_tensor_dim, in_tmp_buf, tmp_buf);
8322 #else
8323  return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast(
8324  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8325  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8326  out_tensor_dim, in_tmp_buf, tmp_buf);
8327 #endif
8328 }
8329 
8396 static inline int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(const q15_t *in_tensor,
8397  const uint16_t in_tensor_dim_x,
8398  const uint16_t in_tensor_dim_y,
8399  const uint16_t in_tensor_ch,
8400  const q15_t *ker_weight,
8401  const uint16_t out_tensor_ch,
8402  const uint16_t ker_dim_x,
8403  const uint16_t ker_dim_y,
8404  const uint16_t pad_x,
8405  const uint16_t pad_y,
8406  const uint16_t stride_x,
8407  const uint16_t stride_y,
8408  const q15_t *bias,
8409  const uint16_t bias_lshift,
8410  const uint16_t out_rshift,
8411  q15_t *out_tensor,
8412  const uint16_t out_tensor_dim_x,
8413  const uint16_t out_tensor_dim_y,
8414  q15_t *in_tmp_buf,
8415  q7_t *tmp_buf)
8416 {
8417 #if defined(__zcc__)
8418  return tpt_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8419  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8420  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8421  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8422  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8423 #else
8424  return riscv_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(
8425  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8426  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8427  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8428  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8429 #endif
8430 }
8431 
8483 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor,
8484  const uint16_t in_tensor_dim,
8485  const uint16_t in_tensor_ch,
8486  const q7_t *ker_weight,
8487  const uint16_t out_tensor_ch,
8488  const uint16_t ker_dim,
8489  const uint16_t pad,
8490  const uint16_t stride,
8491  const q7_t *bias,
8492  const uint16_t bias_lshift,
8493  const uint16_t out_rshift,
8494  q7_t *out_tensor,
8495  const uint16_t out_tensor_dim,
8496  q15_t *in_tmp_buf,
8497  q7_t *tmp_buf)
8498 {
8499 #if defined(__zcc__)
8500  return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8501  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8502  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8503  out_tensor_dim, in_tmp_buf, tmp_buf);
8504 #else
8505  return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias(
8506  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
8507  ker_dim, pad, stride, bias, bias_lshift, out_rshift, out_tensor,
8508  out_tensor_dim, in_tmp_buf, tmp_buf);
8509 #endif
8510 }
8511 
8574 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor,
8575  const uint16_t in_tensor_dim_x,
8576  const uint16_t in_tensor_dim_y,
8577  const uint16_t in_tensor_ch,
8578  const q7_t *ker_weight,
8579  const uint16_t out_tensor_ch,
8580  const uint16_t ker_dim_x,
8581  const uint16_t ker_dim_y,
8582  const uint16_t pad_x,
8583  const uint16_t pad_y,
8584  const uint16_t stride_x,
8585  const uint16_t stride_y,
8586  const q7_t *bias,
8587  const uint16_t bias_lshift,
8588  const uint16_t out_rshift,
8589  q7_t *out_tensor,
8590  const uint16_t out_tensor_dim_x,
8591  const uint16_t out_tensor_dim_y,
8592  q15_t *in_tmp_buf,
8593  q7_t *tmp_buf)
8594 {
8595 #if defined(__zcc__)
8596  return tpt_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8597  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8598  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8599  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8600  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8601 #else
8602  return riscv_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(
8603  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8604  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8605  bias, bias_lshift, out_rshift, out_tensor, out_tensor_dim_x,
8606  out_tensor_dim_y, in_tmp_buf, tmp_buf);
8607 #endif
8608 }
8609 
8655 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor,
8656  const uint16_t in_tensor_dim_x,
8657  const uint16_t in_tensor_dim_y,
8658  const uint16_t in_tensor_ch,
8659  const q7_t *ker_weight,
8660  const uint16_t out_tensor_ch,
8661  const uint16_t ker_dim_x,
8662  const uint16_t ker_dim_y,
8663  const uint16_t pad_x,
8664  const uint16_t pad_y,
8665  const uint16_t stride_x,
8666  const uint16_t stride_y,
8667  const q31_t *bias,
8668  const uint16_t pre_rshift,
8669  const uint16_t out_scale,
8670  const uint16_t post_rshift,
8671  q7_t *out_tensor,
8672  const uint16_t out_tensor_dim_x,
8673  const uint16_t out_tensor_dim_y,
8674  q15_t *in_tmp_buf)
8675 {
8676 #if defined(__zcc__)
8677 
8678 tpt_nn_conv_1x1_sym_params S1 = {stride_x, stride_y, pad_x, pad_y, pre_rshift, out_scale, post_rshift};
8679 tpt_nn_1x1_sym_dims S2 = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x, ker_dim_y,
8680  out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
8681  return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8682  out_tensor_ch, in_tensor, ker_weight, bias, &S1, &S2, in_tmp_buf);
8683 
8684 #else
8685  return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(
8686  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8687  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8688  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8689  out_tensor_dim_y, in_tmp_buf);
8690 #endif
8691 }
8692 
8739 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor,
8740  const uint16_t in_tensor_dim_x,
8741  const uint16_t in_tensor_dim_y,
8742  const uint16_t in_tensor_ch,
8743  const q7_t *ker_weight,
8744  const uint16_t out_tensor_ch,
8745  const uint16_t ker_dim_x,
8746  const uint16_t ker_dim_y,
8747  const uint16_t pad_x,
8748  const uint16_t pad_y,
8749  const uint16_t stride_x,
8750  const uint16_t stride_y,
8751  const q31_t *bias,
8752  const uint16_t pre_rshift,
8753  const uint16_t out_scale,
8754  const uint16_t post_rshift,
8755  q15_t *out_tensor,
8756  const uint16_t out_tensor_dim_x,
8757  const uint16_t out_tensor_dim_y,
8758  q15_t *in_tmp_buf)
8759 {
8760 #if defined(__zcc__)
8761  return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8762  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8763  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8764  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8765  out_tensor_dim_y, in_tmp_buf);
8766 #else
8767  return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(
8768  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8769  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8770  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8771  out_tensor_dim_y, in_tmp_buf);
8772 #endif
8773 }
8774 
8820 static inline int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor,
8821  const uint16_t in_tensor_dim_x,
8822  const uint16_t in_tensor_dim_y,
8823  const uint16_t in_tensor_ch,
8824  const q7_t *ker_weight,
8825  const uint16_t out_tensor_ch,
8826  const uint16_t ker_dim_x,
8827  const uint16_t ker_dim_y,
8828  const uint16_t pad_x,
8829  const uint16_t pad_y,
8830  const uint16_t stride_x,
8831  const uint16_t stride_y,
8832  const q31_t *bias,
8833  const uint16_t pre_rshift,
8834  const uint16_t out_scale,
8835  const uint16_t post_rshift,
8836  u8_t *out_tensor,
8837  const uint16_t out_tensor_dim_x,
8838  const uint16_t out_tensor_dim_y,
8839  q15_t *in_tmp_buf)
8840 {
8841 #if defined(__zcc__)
8842  return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8843  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8844  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8845  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8846  out_tensor_dim_y, in_tmp_buf);
8847 #else
8848  return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(
8849  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8850  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8851  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8852  out_tensor_dim_y, in_tmp_buf);
8853 #endif
8854 }
8855 
8902 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor,
8903  const uint16_t in_tensor_dim_x,
8904  const uint16_t in_tensor_dim_y,
8905  const uint16_t in_tensor_ch,
8906  const q7_t *ker_weight,
8907  const uint16_t out_tensor_ch,
8908  const uint16_t ker_dim_x,
8909  const uint16_t ker_dim_y,
8910  const uint16_t pad_x,
8911  const uint16_t pad_y,
8912  const uint16_t stride_x,
8913  const uint16_t stride_y,
8914  const q31_t *bias,
8915  const uint16_t pre_rshift,
8916  const uint16_t out_scale,
8917  const uint16_t post_rshift,
8918  q7_t *out_tensor,
8919  const uint16_t out_tensor_dim_x,
8920  const uint16_t out_tensor_dim_y,
8921  q15_t *in_tmp_buf)
8922 {
8923 #if defined(__zcc__)
8924  return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8925  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8926  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8927  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8928  out_tensor_dim_y, in_tmp_buf);
8929 #else
8930  return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(
8931  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
8932  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
8933  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
8934  out_tensor_dim_y, in_tmp_buf);
8935 #endif
8936 }
8937 
8984 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor,
8985  const uint16_t in_tensor_dim_x,
8986  const uint16_t in_tensor_dim_y,
8987  const uint16_t in_tensor_ch,
8988  const q7_t *ker_weight,
8989  const uint16_t out_tensor_ch,
8990  const uint16_t ker_dim_x,
8991  const uint16_t ker_dim_y,
8992  const uint16_t pad_x,
8993  const uint16_t pad_y,
8994  const uint16_t stride_x,
8995  const uint16_t stride_y,
8996  const q31_t *bias,
8997  const uint16_t pre_rshift,
8998  const uint16_t out_scale,
8999  const uint16_t post_rshift,
9000  q15_t *out_tensor,
9001  const uint16_t out_tensor_dim_x,
9002  const uint16_t out_tensor_dim_y,
9003  q15_t *in_tmp_buf)
9004 {
9005 #if defined(__zcc__)
9006  return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9007  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9008  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9009  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9010  out_tensor_dim_y, in_tmp_buf);
9011 #else
9012  return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(
9013  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9014  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9015  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9016  out_tensor_dim_y, in_tmp_buf);
9017 #endif
9018 }
9019 
9064 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor,
9065  const uint16_t in_tensor_dim_x,
9066  const uint16_t in_tensor_dim_y,
9067  const uint16_t in_tensor_ch,
9068  const q7_t *ker_weight,
9069  const uint16_t out_tensor_ch,
9070  const uint16_t ker_dim_x,
9071  const uint16_t ker_dim_y,
9072  const uint16_t pad_x,
9073  const uint16_t pad_y,
9074  const uint16_t stride_x,
9075  const uint16_t stride_y,
9076  const uint16_t pre_rshift,
9077  const uint16_t out_scale,
9078  const uint16_t post_rshift,
9079  q7_t *out_tensor,
9080  const uint16_t out_tensor_dim_x,
9081  const uint16_t out_tensor_dim_y,
9082  q15_t *in_tmp_buf)
9083 {
9084 #if defined(__zcc__)
9085  return tpt_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9086  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9087  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9088  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9089  out_tensor_dim_y, in_tmp_buf);
9090 #else
9091  return riscv_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(
9092  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9093  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9094  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9095  out_tensor_dim_y, in_tmp_buf);
9096 #endif
9097 }
9098 
9144 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor,
9145  const uint16_t in_tensor_dim_x,
9146  const uint16_t in_tensor_dim_y,
9147  const uint16_t in_tensor_ch,
9148  const q7_t *ker_weight,
9149  const uint16_t out_tensor_ch,
9150  const uint16_t ker_dim_x,
9151  const uint16_t ker_dim_y,
9152  const uint16_t pad_x,
9153  const uint16_t pad_y,
9154  const uint16_t stride_x,
9155  const uint16_t stride_y,
9156  const uint16_t pre_rshift,
9157  const uint16_t out_scale,
9158  const uint16_t post_rshift,
9159  q15_t *out_tensor,
9160  const uint16_t out_tensor_dim_x,
9161  const uint16_t out_tensor_dim_y,
9162  q15_t *in_tmp_buf)
9163 {
9164 #if defined(__zcc__)
9165  return tpt_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9166  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9167  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9168  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9169  out_tensor_dim_y, in_tmp_buf);
9170 #else
9171  return riscv_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(
9172  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9173  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9174  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9175  out_tensor_dim_y, in_tmp_buf);
9176 #endif
9177 }
9178 
9223 static inline int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor,
9224  const uint16_t in_tensor_dim_x,
9225  const uint16_t in_tensor_dim_y,
9226  const uint16_t in_tensor_ch,
9227  const q7_t *ker_weight,
9228  const uint16_t out_tensor_ch,
9229  const uint16_t ker_dim_x,
9230  const uint16_t ker_dim_y,
9231  const uint16_t pad_x,
9232  const uint16_t pad_y,
9233  const uint16_t stride_x,
9234  const uint16_t stride_y,
9235  const uint16_t pre_rshift,
9236  const uint16_t out_scale,
9237  const uint16_t post_rshift,
9238  u8_t *out_tensor,
9239  const uint16_t out_tensor_dim_x,
9240  const uint16_t out_tensor_dim_y,
9241  q15_t *in_tmp_buf)
9242 {
9243 #if defined(__zcc__)
9244  return tpt_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9245  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9246  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9247  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9248  out_tensor_dim_y, in_tmp_buf);
9249 #else
9250  return riscv_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(
9251  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9252  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9253  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9254  out_tensor_dim_y, in_tmp_buf);
9255 #endif
9256 }
9257 
9303 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor,
9304  const uint16_t in_tensor_dim_x,
9305  const uint16_t in_tensor_dim_y,
9306  const uint16_t in_tensor_ch,
9307  const q7_t *ker_weight,
9308  const uint16_t out_tensor_ch,
9309  const uint16_t ker_dim_x,
9310  const uint16_t ker_dim_y,
9311  const uint16_t pad_x,
9312  const uint16_t pad_y,
9313  const uint16_t stride_x,
9314  const uint16_t stride_y,
9315  const uint16_t pre_rshift,
9316  const uint16_t out_scale,
9317  const uint16_t post_rshift,
9318  q7_t *out_tensor,
9319  const uint16_t out_tensor_dim_x,
9320  const uint16_t out_tensor_dim_y,
9321  q15_t *in_tmp_buf)
9322 {
9323 #if defined(__zcc__)
9324  return tpt_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9325  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9326  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9327  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9328  out_tensor_dim_y, in_tmp_buf);
9329 #else
9330  return riscv_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(
9331  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9332  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9333  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9334  out_tensor_dim_y, in_tmp_buf);
9335 #endif
9336 }
9337 
9383 static inline int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor,
9384  const uint16_t in_tensor_dim_x,
9385  const uint16_t in_tensor_dim_y,
9386  const uint16_t in_tensor_ch,
9387  const q7_t *ker_weight,
9388  const uint16_t out_tensor_ch,
9389  const uint16_t ker_dim_x,
9390  const uint16_t ker_dim_y,
9391  const uint16_t pad_x,
9392  const uint16_t pad_y,
9393  const uint16_t stride_x,
9394  const uint16_t stride_y,
9395  const uint16_t pre_rshift,
9396  const uint16_t out_scale,
9397  const uint16_t post_rshift,
9398  q15_t *out_tensor,
9399  const uint16_t out_tensor_dim_x,
9400  const uint16_t out_tensor_dim_y,
9401  q15_t *in_tmp_buf)
9402 {
9403 #if defined(__zcc__)
9404  return tpt_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9405  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9406  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9407  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9408  out_tensor_dim_y, in_tmp_buf);
9409 #else
9410  return riscv_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(
9411  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
9412  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
9413  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
9414  out_tensor_dim_y, in_tmp_buf);
9415 #endif
9416 }
9417 
9449 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(const q7_t *in_tensor,
9450  const uint16_t in_tensor_dim,
9451  const q7_t *ker_weight,
9452  const uint16_t out_tensor_ch,
9453  const uint16_t ker_dim,
9454  const uint16_t pad,
9455  const uint16_t stride,
9456  const q31_t *bias,
9457  const uint16_t pre_rshift,
9458  const uint16_t out_scale,
9459  const uint16_t post_rshift,
9460  q7_t *out_tensor,
9461  const uint16_t out_tensor_dim,
9462  q15_t *in_tmp_buf,
9463  q15_t *wt_tmp_buf)
9464 {
9465 #if defined(__zcc__)
9466  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9467  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9468  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9469  in_tmp_buf, wt_tmp_buf);
9470 #else
9471  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(
9472  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9473  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9474  in_tmp_buf, wt_tmp_buf);
9475 #endif
9476 }
9477 
9510 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(const q7_t *in_tensor,
9511  const uint16_t in_tensor_dim,
9512  const q7_t *ker_weight,
9513  const uint16_t out_tensor_ch,
9514  const uint16_t ker_dim,
9515  const uint16_t pad,
9516  const uint16_t stride,
9517  const q31_t *bias,
9518  const uint16_t pre_rshift,
9519  const uint16_t out_scale,
9520  const uint16_t post_rshift,
9521  q15_t *out_tensor,
9522  const uint16_t out_tensor_dim,
9523  q15_t *in_tmp_buf,
9524  q15_t *wt_tmp_buf)
9525 {
9526 #if defined(__zcc__)
9527  return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9528  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9529  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9530  in_tmp_buf, wt_tmp_buf);
9531 #else
9532  return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(
9533  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9534  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9535  in_tmp_buf, wt_tmp_buf);
9536 #endif
9537 }
9538 
9570 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
9571  const uint16_t in_tensor_dim,
9572  const q7_t *ker_weight,
9573  const uint16_t out_tensor_ch,
9574  const uint16_t ker_dim,
9575  const uint16_t pad,
9576  const uint16_t stride,
9577  const q31_t *bias,
9578  const uint16_t pre_rshift,
9579  const uint16_t out_scale,
9580  const uint16_t post_rshift,
9581  u8_t *out_tensor,
9582  const uint16_t out_tensor_dim,
9583  q15_t *in_tmp_buf,
9584  q15_t *wt_tmp_buf)
9585 {
9586 #if defined(__zcc__)
9587  return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9588  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9589  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9590  in_tmp_buf, wt_tmp_buf);
9591 #else
9592  return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(
9593  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9594  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9595  in_tmp_buf, wt_tmp_buf);
9596 #endif
9597 }
9598 
9630 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
9631  const uint16_t in_tensor_dim,
9632  const q7_t *ker_weight,
9633  const uint16_t out_tensor_ch,
9634  const uint16_t ker_dim,
9635  const uint16_t pad,
9636  const uint16_t stride,
9637  const q31_t *bias,
9638  const uint16_t pre_rshift,
9639  const uint16_t out_scale,
9640  const uint16_t post_rshift,
9641  q7_t *out_tensor,
9642  const uint16_t out_tensor_dim,
9643  q15_t *in_tmp_buf,
9644  q15_t *wt_tmp_buf)
9645 {
9646 #if defined(__zcc__)
9647  return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9648  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9649  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9650  in_tmp_buf, wt_tmp_buf);
9651 #else
9652  return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(
9653  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9654  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9655  in_tmp_buf, wt_tmp_buf);
9656 #endif
9657 }
9658 
9691 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(const u8_t *in_tensor,
9692  const uint16_t in_tensor_dim,
9693  const q7_t *ker_weight,
9694  const uint16_t out_tensor_ch,
9695  const uint16_t ker_dim,
9696  const uint16_t pad,
9697  const uint16_t stride,
9698  const q31_t *bias,
9699  const uint16_t pre_rshift,
9700  const uint16_t out_scale,
9701  const uint16_t post_rshift,
9702  q15_t *out_tensor,
9703  const uint16_t out_tensor_dim,
9704  q15_t *in_tmp_buf,
9705  q15_t *wt_tmp_buf)
9706 {
9707 #if defined(__zcc__)
9708  return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9709  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9710  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9711  in_tmp_buf, wt_tmp_buf);
9712 #else
9713  return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(
9714  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9715  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9716  in_tmp_buf, wt_tmp_buf);
9717 #endif
9718 }
9719 
9750 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(const q7_t *in_tensor,
9751  const uint16_t in_tensor_dim,
9752  const q7_t *ker_weight,
9753  const uint16_t out_tensor_ch,
9754  const uint16_t ker_dim,
9755  const uint16_t pad,
9756  const uint16_t stride,
9757  const uint16_t pre_rshift,
9758  const uint16_t out_scale,
9759  const uint16_t post_rshift,
9760  q7_t *out_tensor,
9761  const uint16_t out_tensor_dim,
9762  q15_t *in_tmp_buf,
9763  q15_t *wt_tmp_buf)
9764 {
9765 #if defined(__zcc__)
9766  return tpt_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9767  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9768  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9769  in_tmp_buf, wt_tmp_buf);
9770 #else
9771  return riscv_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(
9772  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9773  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9774  in_tmp_buf, wt_tmp_buf);
9775 #endif
9776 }
9777 
9808 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(const q7_t *in_tensor,
9809  const uint16_t in_tensor_dim,
9810  const q7_t *ker_weight,
9811  const uint16_t out_tensor_ch,
9812  const uint16_t ker_dim,
9813  const uint16_t pad,
9814  const uint16_t stride,
9815  const uint16_t pre_rshift,
9816  const uint16_t out_scale,
9817  const uint16_t post_rshift,
9818  q15_t *out_tensor,
9819  const uint16_t out_tensor_dim,
9820  q15_t *in_tmp_buf,
9821  q15_t *wt_tmp_buf)
9822 {
9823 #if defined(__zcc__)
9824  return tpt_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9825  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9826  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9827  in_tmp_buf, wt_tmp_buf);
9828 #else
9829  return riscv_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(
9830  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9831  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9832  in_tmp_buf, wt_tmp_buf);
9833 #endif
9834 }
9835 
9866 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(const u8_t *in_tensor,
9867  const uint16_t in_tensor_dim,
9868  const q7_t *ker_weight,
9869  const uint16_t out_tensor_ch,
9870  const uint16_t ker_dim,
9871  const uint16_t pad,
9872  const uint16_t stride,
9873  const uint16_t pre_rshift,
9874  const uint16_t out_scale,
9875  const uint16_t post_rshift,
9876  u8_t *out_tensor,
9877  const uint16_t out_tensor_dim,
9878  q15_t *in_tmp_buf,
9879  q15_t *wt_tmp_buf)
9880 {
9881 #if defined(__zcc__)
9882  return tpt_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9883  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9884  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9885  in_tmp_buf, wt_tmp_buf);
9886 #else
9887  return riscv_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(
9888  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9889  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9890  in_tmp_buf, wt_tmp_buf);
9891 #endif
9892 }
9893 
9924 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(const u8_t *in_tensor,
9925  const uint16_t in_tensor_dim,
9926  const q7_t *ker_weight,
9927  const uint16_t out_tensor_ch,
9928  const uint16_t ker_dim,
9929  const uint16_t pad,
9930  const uint16_t stride,
9931  const uint16_t pre_rshift,
9932  const uint16_t out_scale,
9933  const uint16_t post_rshift,
9934  q7_t *out_tensor,
9935  const uint16_t out_tensor_dim,
9936  q15_t *in_tmp_buf,
9937  q15_t *wt_tmp_buf)
9938 {
9939 #if defined(__zcc__)
9940  return tpt_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9941  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9942  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9943  in_tmp_buf, wt_tmp_buf);
9944 #else
9945  return riscv_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(
9946  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
9947  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
9948  in_tmp_buf, wt_tmp_buf);
9949 #endif
9950 }
9951 
9982 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(const u8_t *in_tensor,
9983  const uint16_t in_tensor_dim,
9984  const q7_t *ker_weight,
9985  const uint16_t out_tensor_ch,
9986  const uint16_t ker_dim,
9987  const uint16_t pad,
9988  const uint16_t stride,
9989  const uint16_t pre_rshift,
9990  const uint16_t out_scale,
9991  const uint16_t post_rshift,
9992  q15_t *out_tensor,
9993  const uint16_t out_tensor_dim,
9994  q15_t *in_tmp_buf,
9995  q15_t *wt_tmp_buf)
9996 {
9997 #if defined(__zcc__)
9998  return tpt_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
9999  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10000  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10001  in_tmp_buf, wt_tmp_buf);
10002 #else
10003  return riscv_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(
10004  in_tensor, in_tensor_dim, ker_weight, out_tensor_ch, ker_dim, pad, stride,
10005  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim,
10006  in_tmp_buf, wt_tmp_buf);
10007 #endif
10008 }
10009 
10040 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast(const q7_t *in_tensor,
10041  const uint16_t in_tensor_dim,
10042  const uint16_t in_tensor_ch,
10043  const q7_t *ker_weight,
10044  const uint16_t out_tensor_ch,
10045  const uint16_t ker_dim,
10046  const uint16_t pad,
10047  const uint16_t stride,
10048  const q31_t *bias,
10049  const uint16_t pre_rshift,
10050  const uint16_t out_scale,
10051  const uint16_t post_rshift,
10052  q7_t *out_tensor,
10053  const uint16_t out_tensor_dim,
10054  q15_t *in_tmp_buf)
10055 {
10056 #if defined(__zcc__)
10057  return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10058  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10059  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10060  out_tensor, out_tensor_dim, in_tmp_buf);
10061 #else
10062  return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast(
10063  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10064  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10065  out_tensor, out_tensor_dim, in_tmp_buf);
10066 #endif
10067 }
10068 
10099 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast(const q7_t *in_tensor,
10100  const uint16_t in_tensor_dim,
10101  const uint16_t in_tensor_ch,
10102  const q7_t *ker_weight,
10103  const uint16_t out_tensor_ch,
10104  const uint16_t ker_dim,
10105  const uint16_t pad,
10106  const uint16_t stride,
10107  const q31_t *bias,
10108  const uint16_t pre_rshift,
10109  const uint16_t out_scale,
10110  const uint16_t post_rshift,
10111  q15_t *out_tensor,
10112  const uint16_t out_tensor_dim,
10113  q15_t *in_tmp_buf)
10114 {
10115 #if defined(__zcc__)
10116  return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10117  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10118  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10119  out_tensor, out_tensor_dim, in_tmp_buf);
10120 #else
10121  return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast(
10122  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10123  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10124  out_tensor, out_tensor_dim, in_tmp_buf);
10125 #endif
10126 }
10127 
10158 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast(const u8_t *in_tensor,
10159  const uint16_t in_tensor_dim,
10160  const uint16_t in_tensor_ch,
10161  const q7_t *ker_weight,
10162  const uint16_t out_tensor_ch,
10163  const uint16_t ker_dim,
10164  const uint16_t pad,
10165  const uint16_t stride,
10166  const q31_t *bias,
10167  const uint16_t pre_rshift,
10168  const uint16_t out_scale,
10169  const uint16_t post_rshift,
10170  u8_t *out_tensor,
10171  const uint16_t out_tensor_dim,
10172  q15_t *in_tmp_buf)
10173 {
10174 #if defined(__zcc__)
10175  return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10176  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10177  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10178  out_tensor, out_tensor_dim, in_tmp_buf);
10179 #else
10180  return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast(
10181  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10182  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10183  out_tensor, out_tensor_dim, in_tmp_buf);
10184 #endif
10185 }
10186 
10217 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast(const u8_t *in_tensor,
10218  const uint16_t in_tensor_dim,
10219  const uint16_t in_tensor_ch,
10220  const q7_t *ker_weight,
10221  const uint16_t out_tensor_ch,
10222  const uint16_t ker_dim,
10223  const uint16_t pad,
10224  const uint16_t stride,
10225  const q31_t *bias,
10226  const uint16_t pre_rshift,
10227  const uint16_t out_scale,
10228  const uint16_t post_rshift,
10229  q7_t *out_tensor,
10230  const uint16_t out_tensor_dim,
10231  q15_t *in_tmp_buf)
10232 {
10233 #if defined(__zcc__)
10234  return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10235  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10236  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10237  out_tensor, out_tensor_dim, in_tmp_buf);
10238 #else
10239  return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast(
10240  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10241  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10242  out_tensor, out_tensor_dim, in_tmp_buf);
10243 #endif
10244 }
10245 
10276 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast(const u8_t *in_tensor,
10277  const uint16_t in_tensor_dim,
10278  const uint16_t in_tensor_ch,
10279  const q7_t *ker_weight,
10280  const uint16_t out_tensor_ch,
10281  const uint16_t ker_dim,
10282  const uint16_t pad,
10283  const uint16_t stride,
10284  const q31_t *bias,
10285  const uint16_t pre_rshift,
10286  const uint16_t out_scale,
10287  const uint16_t post_rshift,
10288  q15_t *out_tensor,
10289  const uint16_t out_tensor_dim,
10290  q15_t *in_tmp_buf)
10291 {
10292 #if defined(__zcc__)
10293  return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10294  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10295  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10296  out_tensor, out_tensor_dim, in_tmp_buf);
10297 #else
10298  return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast(
10299  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10300  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
10301  out_tensor, out_tensor_dim, in_tmp_buf);
10302 #endif
10303 }
10304 
10334 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast(const q7_t *in_tensor,
10335  const uint16_t in_tensor_dim,
10336  const uint16_t in_tensor_ch,
10337  const q7_t *ker_weight,
10338  const uint16_t out_tensor_ch,
10339  const uint16_t ker_dim,
10340  const uint16_t pad,
10341  const uint16_t stride,
10342  const uint16_t pre_rshift,
10343  const uint16_t out_scale,
10344  const uint16_t post_rshift,
10345  q7_t *out_tensor,
10346  const uint16_t out_tensor_dim,
10347  q15_t *in_tmp_buf)
10348 {
10349 #if defined(__zcc__)
10350  return tpt_nn_conv_HWC_s8_s8_s8_sym_fast(
10351  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10352  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10353  out_tensor_dim, in_tmp_buf);
10354 #else
10355  return riscv_nn_conv_HWC_s8_s8_s8_sym_fast(
10356  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10357  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10358  out_tensor_dim, in_tmp_buf);
10359 #endif
10360 }
10361 
10391 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast(const q7_t *in_tensor,
10392  const uint16_t in_tensor_dim,
10393  const uint16_t in_tensor_ch,
10394  const q7_t *ker_weight,
10395  const uint16_t out_tensor_ch,
10396  const uint16_t ker_dim,
10397  const uint16_t pad,
10398  const uint16_t stride,
10399  const uint16_t pre_rshift,
10400  const uint16_t out_scale,
10401  const uint16_t post_rshift,
10402  q15_t *out_tensor,
10403  const uint16_t out_tensor_dim,
10404  q15_t *in_tmp_buf)
10405 {
10406 #if defined(__zcc__)
10407  return tpt_nn_conv_HWC_s8_s16_s8_sym_fast(
10408  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10409  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10410  out_tensor_dim, in_tmp_buf);
10411 #else
10412  return riscv_nn_conv_HWC_s8_s16_s8_sym_fast(
10413  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10414  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10415  out_tensor_dim, in_tmp_buf);
10416 #endif
10417 }
10418 
10448 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast(const u8_t *in_tensor,
10449  const uint16_t in_tensor_dim,
10450  const uint16_t in_tensor_ch,
10451  const q7_t *ker_weight,
10452  const uint16_t out_tensor_ch,
10453  const uint16_t ker_dim,
10454  const uint16_t pad,
10455  const uint16_t stride,
10456  const uint16_t pre_rshift,
10457  const uint16_t out_scale,
10458  const uint16_t post_rshift,
10459  u8_t *out_tensor,
10460  const uint16_t out_tensor_dim,
10461  q15_t *in_tmp_buf)
10462 {
10463 #if defined(__zcc__)
10464  return tpt_nn_conv_HWC_u8_u8_s8_sym_fast(
10465  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10466  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10467  out_tensor_dim, in_tmp_buf);
10468 #else
10469  return riscv_nn_conv_HWC_u8_u8_s8_sym_fast(
10470  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10471  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10472  out_tensor_dim, in_tmp_buf);
10473 #endif
10474 }
10475 
10505 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast(const u8_t *in_tensor,
10506  const uint16_t in_tensor_dim,
10507  const uint16_t in_tensor_ch,
10508  const q7_t *ker_weight,
10509  const uint16_t out_tensor_ch,
10510  const uint16_t ker_dim,
10511  const uint16_t pad,
10512  const uint16_t stride,
10513  const uint16_t pre_rshift,
10514  const uint16_t out_scale,
10515  const uint16_t post_rshift,
10516  q7_t *out_tensor,
10517  const uint16_t out_tensor_dim,
10518  q15_t *in_tmp_buf)
10519 {
10520 #if defined(__zcc__)
10521  return tpt_nn_conv_HWC_u8_s8_s8_sym_fast(
10522  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10523  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10524  out_tensor_dim, in_tmp_buf);
10525 #else
10526  return riscv_nn_conv_HWC_u8_s8_s8_sym_fast(
10527  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10528  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10529  out_tensor_dim, in_tmp_buf);
10530 #endif
10531 }
10532 
10562 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast(const u8_t *in_tensor,
10563  const uint16_t in_tensor_dim,
10564  const uint16_t in_tensor_ch,
10565  const q7_t *ker_weight,
10566  const uint16_t out_tensor_ch,
10567  const uint16_t ker_dim,
10568  const uint16_t pad,
10569  const uint16_t stride,
10570  const uint16_t pre_rshift,
10571  const uint16_t out_scale,
10572  const uint16_t post_rshift,
10573  q15_t *out_tensor,
10574  const uint16_t out_tensor_dim,
10575  q15_t *in_tmp_buf)
10576 {
10577 #if defined(__zcc__)
10578  return tpt_nn_conv_HWC_u8_s16_s8_sym_fast(
10579  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10580  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10581  out_tensor_dim, in_tmp_buf);
10582 #else
10583  return riscv_nn_conv_HWC_u8_s16_s8_sym_fast(
10584  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
10585  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
10586  out_tensor_dim, in_tmp_buf);
10587 #endif
10588 }
10589 
10626 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor,
10627  const uint16_t in_tensor_dim_x,
10628  const uint16_t in_tensor_dim_y,
10629  const uint16_t in_tensor_ch,
10630  const q7_t *ker_weight,
10631  const uint16_t out_tensor_ch,
10632  const uint16_t ker_dim_x,
10633  const uint16_t ker_dim_y,
10634  const uint16_t pad_x,
10635  const uint16_t pad_y,
10636  const uint16_t stride_x,
10637  const uint16_t stride_y,
10638  const q31_t *bias,
10639  const uint16_t pre_rshift,
10640  const uint16_t out_scale,
10641  const uint16_t post_rshift,
10642  q7_t *out_tensor,
10643  const uint16_t out_tensor_dim_x,
10644  const uint16_t out_tensor_dim_y,
10645  q15_t *in_tmp_buf)
10646 {
10647 #if defined(__zcc__)
10648  return tpt_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10649  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10650  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10651  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10652  out_tensor_dim_y, in_tmp_buf);
10653 #else
10654  return riscv_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(
10655  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10656  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10657  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10658  out_tensor_dim_y, in_tmp_buf);
10659 #endif
10660 }
10661 
10699 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor,
10700  const uint16_t in_tensor_dim_x,
10701  const uint16_t in_tensor_dim_y,
10702  const uint16_t in_tensor_ch,
10703  const q7_t *ker_weight,
10704  const uint16_t out_tensor_ch,
10705  const uint16_t ker_dim_x,
10706  const uint16_t ker_dim_y,
10707  const uint16_t pad_x,
10708  const uint16_t pad_y,
10709  const uint16_t stride_x,
10710  const uint16_t stride_y,
10711  const q31_t *bias,
10712  const uint16_t pre_rshift,
10713  const uint16_t out_scale,
10714  const uint16_t post_rshift,
10715  q15_t *out_tensor,
10716  const uint16_t out_tensor_dim_x,
10717  const uint16_t out_tensor_dim_y,
10718  q15_t *in_tmp_buf)
10719 {
10720 #if defined(__zcc__)
10721  return tpt_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10722  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10723  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10724  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10725  out_tensor_dim_y, in_tmp_buf);
10726 #else
10727  return riscv_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(
10728  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10729  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10730  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10731  out_tensor_dim_y, in_tmp_buf);
10732 #endif
10733 }
10734 
10771 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor,
10772  const uint16_t in_tensor_dim_x,
10773  const uint16_t in_tensor_dim_y,
10774  const uint16_t in_tensor_ch,
10775  const q7_t *ker_weight,
10776  const uint16_t out_tensor_ch,
10777  const uint16_t ker_dim_x,
10778  const uint16_t ker_dim_y,
10779  const uint16_t pad_x,
10780  const uint16_t pad_y,
10781  const uint16_t stride_x,
10782  const uint16_t stride_y,
10783  const q31_t *bias,
10784  const uint16_t pre_rshift,
10785  const uint16_t out_scale,
10786  const uint16_t post_rshift,
10787  u8_t *out_tensor,
10788  const uint16_t out_tensor_dim_x,
10789  const uint16_t out_tensor_dim_y,
10790  q15_t *in_tmp_buf)
10791 {
10792 #if defined(__zcc__)
10793  return tpt_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10794  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10795  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10796  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10797  out_tensor_dim_y, in_tmp_buf);
10798 #else
10799  return riscv_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(
10800  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10801  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10802  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10803  out_tensor_dim_y, in_tmp_buf);
10804 #endif
10805 }
10806 
10844 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor,
10845  const uint16_t in_tensor_dim_x,
10846  const uint16_t in_tensor_dim_y,
10847  const uint16_t in_tensor_ch,
10848  const q7_t *ker_weight,
10849  const uint16_t out_tensor_ch,
10850  const uint16_t ker_dim_x,
10851  const uint16_t ker_dim_y,
10852  const uint16_t pad_x,
10853  const uint16_t pad_y,
10854  const uint16_t stride_x,
10855  const uint16_t stride_y,
10856  const q31_t *bias,
10857  const uint16_t pre_rshift,
10858  const uint16_t out_scale,
10859  const uint16_t post_rshift,
10860  q7_t *out_tensor,
10861  const uint16_t out_tensor_dim_x,
10862  const uint16_t out_tensor_dim_y,
10863  q15_t *in_tmp_buf)
10864 {
10865 #if defined(__zcc__)
10866  return tpt_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10867  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10868  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10869  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10870  out_tensor_dim_y, in_tmp_buf);
10871 #else
10872  return riscv_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(
10873  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10874  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10875  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10876  out_tensor_dim_y, in_tmp_buf);
10877 #endif
10878 }
10879 
10917 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor,
10918  const uint16_t in_tensor_dim_x,
10919  const uint16_t in_tensor_dim_y,
10920  const uint16_t in_tensor_ch,
10921  const q7_t *ker_weight,
10922  const uint16_t out_tensor_ch,
10923  const uint16_t ker_dim_x,
10924  const uint16_t ker_dim_y,
10925  const uint16_t pad_x,
10926  const uint16_t pad_y,
10927  const uint16_t stride_x,
10928  const uint16_t stride_y,
10929  const q31_t *bias,
10930  const uint16_t pre_rshift,
10931  const uint16_t out_scale,
10932  const uint16_t post_rshift,
10933  q15_t *out_tensor,
10934  const uint16_t out_tensor_dim_x,
10935  const uint16_t out_tensor_dim_y,
10936  q15_t *in_tmp_buf)
10937 {
10938 #if defined(__zcc__)
10939  return tpt_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10940  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10941  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10942  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10943  out_tensor_dim_y, in_tmp_buf);
10944 #else
10945  return riscv_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(
10946  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
10947  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
10948  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
10949  out_tensor_dim_y, in_tmp_buf);
10950 #endif
10951 }
10952 
10988 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor,
10989  const uint16_t in_tensor_dim_x,
10990  const uint16_t in_tensor_dim_y,
10991  const uint16_t in_tensor_ch,
10992  const q7_t *ker_weight,
10993  const uint16_t out_tensor_ch,
10994  const uint16_t ker_dim_x,
10995  const uint16_t ker_dim_y,
10996  const uint16_t pad_x,
10997  const uint16_t pad_y,
10998  const uint16_t stride_x,
10999  const uint16_t stride_y,
11000  const uint16_t pre_rshift,
11001  const uint16_t out_scale,
11002  const uint16_t post_rshift,
11003  q7_t *out_tensor,
11004  const uint16_t out_tensor_dim_x,
11005  const uint16_t out_tensor_dim_y,
11006  q15_t *in_tmp_buf)
11007 {
11008 #if defined(__zcc__)
11009  return tpt_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11010  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11011  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11012  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11013  out_tensor_dim_y, in_tmp_buf);
11014 #else
11015  return riscv_nn_conv_HWC_s8_s8_s8_sym_fast_any(
11016  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11017  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11018  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11019  out_tensor_dim_y, in_tmp_buf);
11020 #endif
11021 }
11022 
11058 static inline int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor,
11059  const uint16_t in_tensor_dim_x,
11060  const uint16_t in_tensor_dim_y,
11061  const uint16_t in_tensor_ch,
11062  const q7_t *ker_weight,
11063  const uint16_t out_tensor_ch,
11064  const uint16_t ker_dim_x,
11065  const uint16_t ker_dim_y,
11066  const uint16_t pad_x,
11067  const uint16_t pad_y,
11068  const uint16_t stride_x,
11069  const uint16_t stride_y,
11070  const uint16_t pre_rshift,
11071  const uint16_t out_scale,
11072  const uint16_t post_rshift,
11073  q15_t *out_tensor,
11074  const uint16_t out_tensor_dim_x,
11075  const uint16_t out_tensor_dim_y,
11076  q15_t *in_tmp_buf)
11077 {
11078 #if defined(__zcc__)
11079  return tpt_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11080  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11081  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11082  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11083  out_tensor_dim_y, in_tmp_buf);
11084 #else
11085  return riscv_nn_conv_HWC_s8_s16_s8_sym_fast_any(
11086  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11087  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11088  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11089  out_tensor_dim_y, in_tmp_buf);
11090 #endif
11091 }
11092 
11128 static inline int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor,
11129  const uint16_t in_tensor_dim_x,
11130  const uint16_t in_tensor_dim_y,
11131  const uint16_t in_tensor_ch,
11132  const q7_t *ker_weight,
11133  const uint16_t out_tensor_ch,
11134  const uint16_t ker_dim_x,
11135  const uint16_t ker_dim_y,
11136  const uint16_t pad_x,
11137  const uint16_t pad_y,
11138  const uint16_t stride_x,
11139  const uint16_t stride_y,
11140  const uint16_t pre_rshift,
11141  const uint16_t out_scale,
11142  const uint16_t post_rshift,
11143  u8_t *out_tensor,
11144  const uint16_t out_tensor_dim_x,
11145  const uint16_t out_tensor_dim_y,
11146  q15_t *in_tmp_buf)
11147 {
11148 #if defined(__zcc__)
11149  return tpt_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11150  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11151  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11152  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11153  out_tensor_dim_y, in_tmp_buf);
11154 #else
11155  return riscv_nn_conv_HWC_u8_u8_s8_sym_fast_any(
11156  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11157  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11158  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11159  out_tensor_dim_y, in_tmp_buf);
11160 #endif
11161 }
11162 
11198 static inline int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor,
11199  const uint16_t in_tensor_dim_x,
11200  const uint16_t in_tensor_dim_y,
11201  const uint16_t in_tensor_ch,
11202  const q7_t *ker_weight,
11203  const uint16_t out_tensor_ch,
11204  const uint16_t ker_dim_x,
11205  const uint16_t ker_dim_y,
11206  const uint16_t pad_x,
11207  const uint16_t pad_y,
11208  const uint16_t stride_x,
11209  const uint16_t stride_y,
11210  const uint16_t pre_rshift,
11211  const uint16_t out_scale,
11212  const uint16_t post_rshift,
11213  q7_t *out_tensor,
11214  const uint16_t out_tensor_dim_x,
11215  const uint16_t out_tensor_dim_y,
11216  q15_t *in_tmp_buf)
11217 {
11218 #if defined(__zcc__)
11219  return tpt_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11220  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11221  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11222  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11223  out_tensor_dim_y, in_tmp_buf);
11224 #else
11225  return riscv_nn_conv_HWC_u8_s8_s8_sym_fast_any(
11226  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11227  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11228  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11229  out_tensor_dim_y, in_tmp_buf);
11230 #endif
11231 }
11232 
11268 static inline int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor,
11269  const uint16_t in_tensor_dim_x,
11270  const uint16_t in_tensor_dim_y,
11271  const uint16_t in_tensor_ch,
11272  const q7_t *ker_weight,
11273  const uint16_t out_tensor_ch,
11274  const uint16_t ker_dim_x,
11275  const uint16_t ker_dim_y,
11276  const uint16_t pad_x,
11277  const uint16_t pad_y,
11278  const uint16_t stride_x,
11279  const uint16_t stride_y,
11280  const uint16_t pre_rshift,
11281  const uint16_t out_scale,
11282  const uint16_t post_rshift,
11283  q15_t *out_tensor,
11284  const uint16_t out_tensor_dim_x,
11285  const uint16_t out_tensor_dim_y,
11286  q15_t *in_tmp_buf)
11287 {
11288 #if defined(__zcc__)
11289  return tpt_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11290  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11291  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11292  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11293  out_tensor_dim_y, in_tmp_buf);
11294 #else
11295  return riscv_nn_conv_HWC_u8_s16_s8_sym_fast_any(
11296  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11297  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11298  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11299  out_tensor_dim_y, in_tmp_buf);
11300 #endif
11301 }
11302 
11303 
11334 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias(const q7_t *in_tensor,
11335  const uint16_t in_tensor_dim,
11336  const uint16_t in_tensor_ch,
11337  const q7_t *ker_weight,
11338  const uint16_t out_tensor_ch,
11339  const uint16_t ker_dim,
11340  const uint16_t pad,
11341  const uint16_t stride,
11342  const q31_t *bias,
11343  const uint16_t pre_rshift,
11344  const uint16_t out_scale,
11345  const uint16_t post_rshift,
11346  q7_t *out_tensor,
11347  const uint16_t out_tensor_dim,
11348  q15_t *in_tmp_buf)
11349 {
11350 #if defined(__zcc__)
11351  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11352  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11353  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11354  out_tensor, out_tensor_dim, in_tmp_buf);
11355 #else
11356  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias(
11357  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11358  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11359  out_tensor, out_tensor_dim, in_tmp_buf);
11360 #endif
11361 }
11362 
11393 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias(const q7_t *in_tensor,
11394  const uint16_t in_tensor_dim,
11395  const uint16_t in_tensor_ch,
11396  const q7_t *ker_weight,
11397  const uint16_t out_tensor_ch,
11398  const uint16_t ker_dim,
11399  const uint16_t pad,
11400  const uint16_t stride,
11401  const q31_t *bias,
11402  const uint16_t pre_rshift,
11403  const uint16_t out_scale,
11404  const uint16_t post_rshift,
11405  q15_t *out_tensor,
11406  const uint16_t out_tensor_dim,
11407  q15_t *in_tmp_buf)
11408 {
11409 #if defined(__zcc__)
11410  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11411  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11412  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11413  out_tensor, out_tensor_dim, in_tmp_buf);
11414 #else
11415  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias(
11416  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11417  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11418  out_tensor, out_tensor_dim, in_tmp_buf);
11419 #endif
11420 }
11421 
11452 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias(const u8_t *in_tensor,
11453  const uint16_t in_tensor_dim,
11454  const uint16_t in_tensor_ch,
11455  const q7_t *ker_weight,
11456  const uint16_t out_tensor_ch,
11457  const uint16_t ker_dim,
11458  const uint16_t pad,
11459  const uint16_t stride,
11460  const q31_t *bias,
11461  const uint16_t pre_rshift,
11462  const uint16_t out_scale,
11463  const uint16_t post_rshift,
11464  u8_t *out_tensor,
11465  const uint16_t out_tensor_dim,
11466  q15_t *in_tmp_buf)
11467 {
11468 #if defined(__zcc__)
11469  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11470  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11471  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11472  out_tensor, out_tensor_dim, in_tmp_buf);
11473 #else
11474  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias(
11475  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11476  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11477  out_tensor, out_tensor_dim, in_tmp_buf);
11478 #endif
11479 }
11480 
11511 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias(const u8_t *in_tensor,
11512  const uint16_t in_tensor_dim,
11513  const uint16_t in_tensor_ch,
11514  const q7_t *ker_weight,
11515  const uint16_t out_tensor_ch,
11516  const uint16_t ker_dim,
11517  const uint16_t pad,
11518  const uint16_t stride,
11519  const q31_t *bias,
11520  const uint16_t pre_rshift,
11521  const uint16_t out_scale,
11522  const uint16_t post_rshift,
11523  q7_t *out_tensor,
11524  const uint16_t out_tensor_dim,
11525  q15_t *in_tmp_buf)
11526 {
11527 #if defined(__zcc__)
11528  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11529  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11530  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11531  out_tensor, out_tensor_dim, in_tmp_buf);
11532 #else
11533  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias(
11534  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11535  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11536  out_tensor, out_tensor_dim, in_tmp_buf);
11537 #endif
11538 }
11539 
11570 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias(const u8_t *in_tensor,
11571  const uint16_t in_tensor_dim,
11572  const uint16_t in_tensor_ch,
11573  const q7_t *ker_weight,
11574  const uint16_t out_tensor_ch,
11575  const uint16_t ker_dim,
11576  const uint16_t pad,
11577  const uint16_t stride,
11578  const q31_t *bias,
11579  const uint16_t pre_rshift,
11580  const uint16_t out_scale,
11581  const uint16_t post_rshift,
11582  q15_t *out_tensor,
11583  const uint16_t out_tensor_dim,
11584  q15_t *in_tmp_buf)
11585 {
11586 #if defined(__zcc__)
11587  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11588  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11589  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11590  out_tensor, out_tensor_dim, in_tmp_buf);
11591 #else
11592  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias(
11593  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11594  ker_dim, pad, stride, bias, pre_rshift, out_scale, post_rshift,
11595  out_tensor, out_tensor_dim, in_tmp_buf);
11596 #endif
11597 }
11598 
11628 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym(const q7_t *in_tensor,
11629  const uint16_t in_tensor_dim,
11630  const uint16_t in_tensor_ch,
11631  const q7_t *ker_weight,
11632  const uint16_t out_tensor_ch,
11633  const uint16_t ker_dim,
11634  const uint16_t pad,
11635  const uint16_t stride,
11636  const uint16_t pre_rshift,
11637  const uint16_t out_scale,
11638  const uint16_t post_rshift,
11639  q7_t *out_tensor,
11640  const uint16_t out_tensor_dim,
11641  q15_t *in_tmp_buf)
11642 {
11643 #if defined(__zcc__)
11644  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym(
11645  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11646  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11647  out_tensor_dim, in_tmp_buf);
11648 #else
11649  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym(
11650  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11651  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11652  out_tensor_dim, in_tmp_buf);
11653 #endif
11654 }
11655 
11685 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym(const q7_t *in_tensor,
11686  const uint16_t in_tensor_dim,
11687  const uint16_t in_tensor_ch,
11688  const q7_t *ker_weight,
11689  const uint16_t out_tensor_ch,
11690  const uint16_t ker_dim,
11691  const uint16_t pad,
11692  const uint16_t stride,
11693  const uint16_t pre_rshift,
11694  const uint16_t out_scale,
11695  const uint16_t post_rshift,
11696  q15_t *out_tensor,
11697  const uint16_t out_tensor_dim,
11698  q15_t *in_tmp_buf)
11699 {
11700 #if defined(__zcc__)
11701  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym(
11702  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11703  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11704  out_tensor_dim, in_tmp_buf);
11705 #else
11706  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym(
11707  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11708  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11709  out_tensor_dim, in_tmp_buf);
11710 #endif
11711 }
11712 
11742 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym(const u8_t *in_tensor,
11743  const uint16_t in_tensor_dim,
11744  const uint16_t in_tensor_ch,
11745  const q7_t *ker_weight,
11746  const uint16_t out_tensor_ch,
11747  const uint16_t ker_dim,
11748  const uint16_t pad,
11749  const uint16_t stride,
11750  const uint16_t pre_rshift,
11751  const uint16_t out_scale,
11752  const uint16_t post_rshift,
11753  u8_t *out_tensor,
11754  const uint16_t out_tensor_dim,
11755  q15_t *in_tmp_buf)
11756 {
11757 #if defined(__zcc__)
11758  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym(
11759  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11760  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11761  out_tensor_dim, in_tmp_buf);
11762 #else
11763  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym(
11764  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11765  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11766  out_tensor_dim, in_tmp_buf);
11767 #endif
11768 }
11769 
11799 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym(const u8_t *in_tensor,
11800  const uint16_t in_tensor_dim,
11801  const uint16_t in_tensor_ch,
11802  const q7_t *ker_weight,
11803  const uint16_t out_tensor_ch,
11804  const uint16_t ker_dim,
11805  const uint16_t pad,
11806  const uint16_t stride,
11807  const uint16_t pre_rshift,
11808  const uint16_t out_scale,
11809  const uint16_t post_rshift,
11810  q7_t *out_tensor,
11811  const uint16_t out_tensor_dim,
11812  q15_t *in_tmp_buf)
11813 {
11814 #if defined(__zcc__)
11815  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym(
11816  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11817  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11818  out_tensor_dim, in_tmp_buf);
11819 #else
11820  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym(
11821  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11822  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11823  out_tensor_dim, in_tmp_buf);
11824 #endif
11825 }
11826 
11856 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym(const u8_t *in_tensor,
11857  const uint16_t in_tensor_dim,
11858  const uint16_t in_tensor_ch,
11859  const q7_t *ker_weight,
11860  const uint16_t out_tensor_ch,
11861  const uint16_t ker_dim,
11862  const uint16_t pad,
11863  const uint16_t stride,
11864  const uint16_t pre_rshift,
11865  const uint16_t out_scale,
11866  const uint16_t post_rshift,
11867  q15_t *out_tensor,
11868  const uint16_t out_tensor_dim,
11869  q15_t *in_tmp_buf)
11870 {
11871 #if defined(__zcc__)
11872  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym(
11873  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11874  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11875  out_tensor_dim, in_tmp_buf);
11876 #else
11877  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym(
11878  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
11879  ker_dim, pad, stride, pre_rshift, out_scale, post_rshift, out_tensor,
11880  out_tensor_dim, in_tmp_buf);
11881 #endif
11882 }
11883 
11920 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(const q7_t *in_tensor,
11921  const uint16_t in_tensor_dim_x,
11922  const uint16_t in_tensor_dim_y,
11923  const uint16_t in_tensor_ch,
11924  const q7_t *ker_weight,
11925  const uint16_t out_tensor_ch,
11926  const uint16_t ker_dim_x,
11927  const uint16_t ker_dim_y,
11928  const uint16_t pad_x,
11929  const uint16_t pad_y,
11930  const uint16_t stride_x,
11931  const uint16_t stride_y,
11932  const q31_t *bias,
11933  const uint16_t pre_rshift,
11934  const uint16_t out_scale,
11935  const uint16_t post_rshift,
11936  q7_t *out_tensor,
11937  const uint16_t out_tensor_dim_x,
11938  const uint16_t out_tensor_dim_y,
11939  q15_t *in_tmp_buf)
11940 {
11941 #if defined(__zcc__)
11942  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11943  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11944  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11945  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11946  out_tensor_dim_y, in_tmp_buf);
11947 #else
11948  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(
11949  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
11950  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
11951  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
11952  out_tensor_dim_y, in_tmp_buf);
11953 #endif
11954 }
11955 
11993 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(const q7_t *in_tensor,
11994  const uint16_t in_tensor_dim_x,
11995  const uint16_t in_tensor_dim_y,
11996  const uint16_t in_tensor_ch,
11997  const q7_t *ker_weight,
11998  const uint16_t out_tensor_ch,
11999  const uint16_t ker_dim_x,
12000  const uint16_t ker_dim_y,
12001  const uint16_t pad_x,
12002  const uint16_t pad_y,
12003  const uint16_t stride_x,
12004  const uint16_t stride_y,
12005  const q31_t *bias,
12006  const uint16_t pre_rshift,
12007  const uint16_t out_scale,
12008  const uint16_t post_rshift,
12009  q15_t *out_tensor,
12010  const uint16_t out_tensor_dim_x,
12011  const uint16_t out_tensor_dim_y,
12012  q15_t *in_tmp_buf)
12013 {
12014 #if defined(__zcc__)
12015  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12016  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12017  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12018  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12019  out_tensor_dim_y, in_tmp_buf);
12020 #else
12021  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(
12022  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12023  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12024  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12025  out_tensor_dim_y, in_tmp_buf);
12026 #endif
12027 }
12028 
12065 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(const u8_t *in_tensor,
12066  const uint16_t in_tensor_dim_x,
12067  const uint16_t in_tensor_dim_y,
12068  const uint16_t in_tensor_ch,
12069  const q7_t *ker_weight,
12070  const uint16_t out_tensor_ch,
12071  const uint16_t ker_dim_x,
12072  const uint16_t ker_dim_y,
12073  const uint16_t pad_x,
12074  const uint16_t pad_y,
12075  const uint16_t stride_x,
12076  const uint16_t stride_y,
12077  const q31_t *bias,
12078  const uint16_t pre_rshift,
12079  const uint16_t out_scale,
12080  const uint16_t post_rshift,
12081  u8_t *out_tensor,
12082  const uint16_t out_tensor_dim_x,
12083  const uint16_t out_tensor_dim_y,
12084  q15_t *in_tmp_buf)
12085 {
12086 #if defined(__zcc__)
12087  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12088  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12089  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12090  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12091  out_tensor_dim_y, in_tmp_buf);
12092 #else
12093  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(
12094  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12095  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12096  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12097  out_tensor_dim_y, in_tmp_buf);
12098 #endif
12099 }
12100 
12138 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(const u8_t *in_tensor,
12139  const uint16_t in_tensor_dim_x,
12140  const uint16_t in_tensor_dim_y,
12141  const uint16_t in_tensor_ch,
12142  const q7_t *ker_weight,
12143  const uint16_t out_tensor_ch,
12144  const uint16_t ker_dim_x,
12145  const uint16_t ker_dim_y,
12146  const uint16_t pad_x,
12147  const uint16_t pad_y,
12148  const uint16_t stride_x,
12149  const uint16_t stride_y,
12150  const q31_t *bias,
12151  const uint16_t pre_rshift,
12152  const uint16_t out_scale,
12153  const uint16_t post_rshift,
12154  q7_t *out_tensor,
12155  const uint16_t out_tensor_dim_x,
12156  const uint16_t out_tensor_dim_y,
12157  q15_t *in_tmp_buf)
12158 {
12159 #if defined(__zcc__)
12160  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12161  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12162  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12163  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12164  out_tensor_dim_y, in_tmp_buf);
12165 #else
12166  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(
12167  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12168  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12169  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12170  out_tensor_dim_y, in_tmp_buf);
12171 #endif
12172 }
12173 
12211 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(const u8_t *in_tensor,
12212  const uint16_t in_tensor_dim_x,
12213  const uint16_t in_tensor_dim_y,
12214  const uint16_t in_tensor_ch,
12215  const q7_t *ker_weight,
12216  const uint16_t out_tensor_ch,
12217  const uint16_t ker_dim_x,
12218  const uint16_t ker_dim_y,
12219  const uint16_t pad_x,
12220  const uint16_t pad_y,
12221  const uint16_t stride_x,
12222  const uint16_t stride_y,
12223  const q31_t *bias,
12224  const uint16_t pre_rshift,
12225  const uint16_t out_scale,
12226  const uint16_t post_rshift,
12227  q15_t *out_tensor,
12228  const uint16_t out_tensor_dim_x,
12229  const uint16_t out_tensor_dim_y,
12230  q15_t *in_tmp_buf)
12231 {
12232 #if defined(__zcc__)
12233  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12234  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12235  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12236  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12237  out_tensor_dim_y, in_tmp_buf);
12238 #else
12239  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(
12240  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12241  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12242  bias, pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12243  out_tensor_dim_y, in_tmp_buf);
12244 #endif
12245 }
12246 
12282 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any(const q7_t *in_tensor,
12283  const uint16_t in_tensor_dim_x,
12284  const uint16_t in_tensor_dim_y,
12285  const uint16_t in_tensor_ch,
12286  const q7_t *ker_weight,
12287  const uint16_t out_tensor_ch,
12288  const uint16_t ker_dim_x,
12289  const uint16_t ker_dim_y,
12290  const uint16_t pad_x,
12291  const uint16_t pad_y,
12292  const uint16_t stride_x,
12293  const uint16_t stride_y,
12294  const uint16_t pre_rshift,
12295  const uint16_t out_scale,
12296  const uint16_t post_rshift,
12297  q7_t *out_tensor,
12298  const uint16_t out_tensor_dim_x,
12299  const uint16_t out_tensor_dim_y,
12300  q15_t *in_tmp_buf)
12301 {
12302 #if defined(__zcc__)
12303  return tpt_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12304  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12305  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12306  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12307  out_tensor_dim_y, in_tmp_buf);
12308 #else
12309  return riscv_nn_conv_dw_HWC_s8_s8_s8_sym_any(
12310  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12311  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12312  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12313  out_tensor_dim_y, in_tmp_buf);
12314 #endif
12315 }
12316 
12353 static inline int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any(const q7_t *in_tensor,
12354  const uint16_t in_tensor_dim_x,
12355  const uint16_t in_tensor_dim_y,
12356  const uint16_t in_tensor_ch,
12357  const q7_t *ker_weight,
12358  const uint16_t out_tensor_ch,
12359  const uint16_t ker_dim_x,
12360  const uint16_t ker_dim_y,
12361  const uint16_t pad_x,
12362  const uint16_t pad_y,
12363  const uint16_t stride_x,
12364  const uint16_t stride_y,
12365  const uint16_t pre_rshift,
12366  const uint16_t out_scale,
12367  const uint16_t post_rshift,
12368  q15_t *out_tensor,
12369  const uint16_t out_tensor_dim_x,
12370  const uint16_t out_tensor_dim_y,
12371  q15_t *in_tmp_buf)
12372 {
12373 #if defined(__zcc__)
12374  return tpt_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12375  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12376  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12377  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12378  out_tensor_dim_y, in_tmp_buf);
12379 #else
12380  return riscv_nn_conv_dw_HWC_s8_s16_s8_sym_any(
12381  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12382  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12383  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12384  out_tensor_dim_y, in_tmp_buf);
12385 #endif
12386 }
12387 
12423 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any(const u8_t *in_tensor,
12424  const uint16_t in_tensor_dim_x,
12425  const uint16_t in_tensor_dim_y,
12426  const uint16_t in_tensor_ch,
12427  const q7_t *ker_weight,
12428  const uint16_t out_tensor_ch,
12429  const uint16_t ker_dim_x,
12430  const uint16_t ker_dim_y,
12431  const uint16_t pad_x,
12432  const uint16_t pad_y,
12433  const uint16_t stride_x,
12434  const uint16_t stride_y,
12435  const uint16_t pre_rshift,
12436  const uint16_t out_scale,
12437  const uint16_t post_rshift,
12438  u8_t *out_tensor,
12439  const uint16_t out_tensor_dim_x,
12440  const uint16_t out_tensor_dim_y,
12441  q15_t *in_tmp_buf)
12442 {
12443 #if defined(__zcc__)
12444  return tpt_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12445  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12446  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12447  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12448  out_tensor_dim_y, in_tmp_buf);
12449 #else
12450  return riscv_nn_conv_dw_HWC_u8_u8_s8_sym_any(
12451  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12452  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12453  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12454  out_tensor_dim_y, in_tmp_buf);
12455 #endif
12456 }
12457 
12494 static inline int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any(const u8_t *in_tensor,
12495  const uint16_t in_tensor_dim_x,
12496  const uint16_t in_tensor_dim_y,
12497  const uint16_t in_tensor_ch,
12498  const q7_t *ker_weight,
12499  const uint16_t out_tensor_ch,
12500  const uint16_t ker_dim_x,
12501  const uint16_t ker_dim_y,
12502  const uint16_t pad_x,
12503  const uint16_t pad_y,
12504  const uint16_t stride_x,
12505  const uint16_t stride_y,
12506  const uint16_t pre_rshift,
12507  const uint16_t out_scale,
12508  const uint16_t post_rshift,
12509  q7_t *out_tensor,
12510  const uint16_t out_tensor_dim_x,
12511  const uint16_t out_tensor_dim_y,
12512  q15_t *in_tmp_buf)
12513 {
12514 #if defined(__zcc__)
12515  return tpt_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12516  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12517  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12518  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12519  out_tensor_dim_y, in_tmp_buf);
12520 #else
12521  return riscv_nn_conv_dw_HWC_u8_s8_s8_sym_any(
12522  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12523  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12524  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12525  out_tensor_dim_y, in_tmp_buf);
12526 #endif
12527 }
12528 
12565 static inline int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any(const u8_t *in_tensor,
12566  const uint16_t in_tensor_dim_x,
12567  const uint16_t in_tensor_dim_y,
12568  const uint16_t in_tensor_ch,
12569  const q7_t *ker_weight,
12570  const uint16_t out_tensor_ch,
12571  const uint16_t ker_dim_x,
12572  const uint16_t ker_dim_y,
12573  const uint16_t pad_x,
12574  const uint16_t pad_y,
12575  const uint16_t stride_x,
12576  const uint16_t stride_y,
12577  const uint16_t pre_rshift,
12578  const uint16_t out_scale,
12579  const uint16_t post_rshift,
12580  q15_t *out_tensor,
12581  const uint16_t out_tensor_dim_x,
12582  const uint16_t out_tensor_dim_y,
12583  q15_t *in_tmp_buf)
12584 {
12585 #if defined(__zcc__)
12586  return tpt_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12587  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12588  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12589  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12590  out_tensor_dim_y, in_tmp_buf);
12591 #else
12592  return riscv_nn_conv_dw_HWC_u8_s16_s8_sym_any(
12593  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12594  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
12595  pre_rshift, out_scale, post_rshift, out_tensor, out_tensor_dim_x,
12596  out_tensor_dim_y, in_tmp_buf);
12597 #endif
12598 }
12599 
12646 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
12647  const uint16_t in_tensor_dim_x,
12648  const uint16_t in_tensor_dim_y,
12649  const uint16_t in_tensor_ch,
12650  const uint16_t in_tensor_group,
12651  const q7_t *ker_weight,
12652  const uint16_t out_tensor_ch,
12653  const uint16_t pad_x,
12654  const uint16_t pad_y,
12655  const uint16_t stride_x,
12656  const uint16_t stride_y,
12657  const int32_t *bias,
12658  q7_t *out_tensor,
12659  const int32_t *out_shift,
12660  const int32_t *out_scale,
12661  const int32_t out_offset,
12662  const int32_t in_offset,
12663  const int32_t act_min,
12664  const int32_t act_max,
12665  const uint16_t out_tensor_dim_x,
12666  const uint16_t out_tensor_dim_y,
12667  q15_t *tmp_buf)
12668 {
12669 #if defined(__zcc__)
12670 
12671  tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
12672  stride_y, pad_x, pad_y, act_min, act_max};
12673 
12674  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12675 
12676  tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12677  in_tensor_group, out_tensor_ch};
12678 
12679  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12680  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
12681 
12682 #else
12683  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
12684  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12685  in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
12686  stride_y, bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
12687  act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
12688 #endif
12689 }
12690 
12697 static inline int32_t
12699  const uint16_t in_tensor_ch) {
12700 #if defined(__zcc__)convol
12701  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
12702  in_tensor_ch);
12703 #else
12704  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
12705  in_tensor_ch);
12706 #endif
12707 }
12708 
12747 static inline int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
12748  const uint16_t in_tensor_dim_x,
12749  const uint16_t in_tensor_ch,
12750  const uint16_t in_tensor_group,
12751  const q7_t *ker_weight,
12752  const uint16_t out_tensor_ch,
12753  const uint16_t ker_dim_x,
12754  const uint16_t pad_x,
12755  const uint16_t stride_x,
12756  const int32_t *bias,
12757  q7_t *out_tensor,
12758  const int32_t *out_shift,
12759  const int32_t *out_scale,
12760  const int32_t out_offset,
12761  const int32_t in_offset,
12762  const int32_t act_min,
12763  const int32_t act_max,
12764  const uint16_t out_tensor_dim_x,
12765  q15_t *in_tmp_buf)
12766 {
12767 #if defined(__zcc__)
12768 
12769  tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
12770  act_min, act_max};
12771 
12772  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12773 
12774  tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
12775  ker_dim_x, out_tensor_dim_x, out_tensor_ch};
12776 
12777  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12778  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
12779 
12780 #else
12781  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
12782  in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
12783  out_tensor_ch, ker_dim_x, pad_x, stride_x, bias, out_tensor, out_shift,
12784  out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12785  in_tmp_buf);
12786 #endif
12787 }
12788 
12798 static inline int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
12799  const uint16_t ker_dim_x,
12800  const uint16_t ker_dim_y)
12801 {
12802 #if defined(__zcc__)
12803  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
12804  in_tensor_ch, ker_dim_x, ker_dim_y);
12805 #else
12806  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12807  in_tensor_ch, ker_dim_x, ker_dim_y);
12808 #endif
12809 }
12810 
12852 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
12853  const uint16_t in_tensor_dim_x,
12854  const uint16_t in_tensor_dim_y,
12855  const uint16_t in_tensor_ch,
12856  const uint16_t in_tensor_group,
12857  const q7_t *ker_weight,
12858  const uint16_t out_tensor_ch,
12859  const uint16_t ker_dim_x,
12860  const uint16_t ker_dim_y,
12861  const uint16_t pad_x,
12862  const uint16_t pad_y,
12863  const uint16_t stride_x,
12864  const uint16_t stride_y,
12865  const int32_t *bias,
12866  q7_t *out_tensor,
12867  const int32_t *out_shift,
12868  const int32_t *out_scale,
12869  const int32_t out_offset,
12870  const int32_t in_offset,
12871  const int32_t act_min,
12872  const int32_t act_max,
12873  const uint16_t out_tensor_dim_x,
12874  const uint16_t out_tensor_dim_y,
12875  q15_t *in_tmp_buf)
12876 {
12877 #if defined(__zcc__)
12878 
12879  tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
12880  in_offset, out_offset, act_min, act_max};
12881 
12882  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
12883 
12884  tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12885  in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
12886  out_tensor_ch};
12887 
12888  return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
12889  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
12890 
12891 #else
12892  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
12893  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
12894  in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
12895  pad_y, stride_x, stride_y, bias, out_tensor, out_shift, out_scale,
12896  out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
12897  out_tensor_dim_y, in_tmp_buf);
12898 #endif
12899 }
12900 
12909 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
12910  const uint16_t ker_dim_x,
12911  const uint16_t ker_dim_y)
12912 {
12913 #if defined(__zcc__)
12914  return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12915  in_tensor_ch, ker_dim_x, ker_dim_y);
12916 #else
12917  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
12918  in_tensor_ch, ker_dim_x, ker_dim_y);
12919 #endif
12920 }
12921 
12961 static inline int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(const int8_t *in_tensor,
12962  const int32_t in_tensor_dim_x,
12963  const int32_t in_tensor_dim_y,
12964  const int32_t in_tensor_ch,
12965  const int8_t *ker_weight,
12966  const int32_t out_tensor_ch,
12967  const int32_t pad_x,
12968  const int32_t pad_y,
12969  const int32_t stride_x,
12970  const int32_t stride_y,
12971  const int32_t *bias,
12972  int8_t *out_tensor,
12973  const int32_t *out_shift,
12974  const int32_t *out_scale,
12975  const int32_t out_tensor_dim_x,
12976  const int32_t out_tensor_dim_y,
12977  const int32_t out_offset,
12978  const int32_t in_offset,
12979  const int32_t act_min,
12980  const int32_t act_max,
12981  const int32_t dilation_x,
12982  const int32_t dilation_y,
12983  int16_t *tmp_buf)
12984 {
12985 #if defined(__zcc__)
12986  return tpt_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
12987  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12988  out_tensor_ch, pad_x, pad_y, stride_x, stride_y, bias, out_tensor,
12989  out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
12990  in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
12991 #else
12992  return riscv_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(
12993  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
12994  out_tensor_ch, pad_x, pad_y, stride_x, stride_y, bias, out_tensor,
12995  out_shift, out_scale, out_tensor_dim_x, out_tensor_dim_y, out_offset,
12996  in_offset, act_min, act_max, dilation_x, dilation_y, tmp_buf);
12997 #endif
12998 }
12999 
13047 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13048  const uint16_t in_tensor_dim_x,
13049  const uint16_t in_tensor_dim_y,
13050  const uint16_t in_tensor_ch,
13051  const q7_t *ker_weight,
13052  const uint16_t out_tensor_ch,
13053  const uint16_t ch_mult,
13054  const uint16_t ker_dim_x,
13055  const uint16_t ker_dim_y,
13056  const uint16_t pad_x,
13057  const uint16_t pad_y,
13058  const uint16_t stride_x,
13059  const uint16_t stride_y,
13060  const int32_t *bias,
13061  q7_t *out_tensor,
13062  const int32_t *out_shift,
13063  const int32_t *out_scale,
13064  const uint16_t out_tensor_dim_x,
13065  const uint16_t out_tensor_dim_y,
13066  const int32_t out_offset,
13067  const int32_t in_offset,
13068  const int32_t act_min,
13069  const int32_t act_max,
13070  const uint16_t dilation_x,
13071  const uint16_t dilation_y,
13072  q15_t *tmp_buf)
13073 {
13074 #if defined(__zcc__)
13075 
13076  tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13077  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13078 
13079  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13080 
13081  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13082  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13083 
13084  return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13085  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13086 
13087 
13088 #else
13089  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13090  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13091  out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13092  stride_y, bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13093  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13094  dilation_y, tmp_buf);
13095 #endif
13096 }
13097 
13142 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
13143  const uint16_t in_tensor_dim_x,
13144  const uint16_t in_tensor_dim_y,
13145  const uint16_t in_tensor_ch,
13146  const q7_t *ker_weight,
13147  const uint16_t out_tensor_ch,
13148  const uint16_t ker_dim_x,
13149  const uint16_t ker_dim_y,
13150  const uint16_t pad_x,
13151  const uint16_t pad_y,
13152  const uint16_t stride_x,
13153  const uint16_t stride_y,
13154  const int32_t *bias,
13155  q7_t *out_tensor,
13156  const int32_t *out_shift,
13157  const int32_t *out_scale,
13158  const uint16_t out_tensor_dim_x,
13159  const uint16_t out_tensor_dim_y,
13160  const int32_t out_offset,
13161  const int32_t in_offset,
13162  const int32_t act_min,
13163  const int32_t act_max,
13164  const uint16_t dilation_x,
13165  const uint16_t dilation_y,
13166  q15_t *in_tmp_buf)
13167 {
13168 #if defined(__zcc__)
13169 
13170  tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13171  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13172 
13173  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13174 
13175  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13176  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13177 
13178  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13179  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13180 
13181 #else
13182  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13183  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13184  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13185  bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13186  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13187  dilation_y, in_tmp_buf);
13188 #endif
13189 }
13190 
13199 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch,
13200  const uint16_t ker_dim_x,
13201  const uint16_t ker_dim_y)
13202 {
13203 #if defined(__zcc__)
13204  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13205  in_tensor_ch, ker_dim_x, ker_dim_y);
13206 #else
13207  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13208  in_tensor_ch, ker_dim_x, ker_dim_y);
13209 #endif
13210 }
13211 
13252 static inline int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(const uint8_t *in_tensor,
13253  const uint16_t in_tensor_dim_x,
13254  const uint16_t in_tensor_dim_y,
13255  const uint16_t in_tensor_ch,
13256  const uint8_t *ker_weight,
13257  const uint16_t ker_dim_x,
13258  const uint16_t ker_dim_y,
13259  const int16_t ch_mult,
13260  const int16_t pad_x,
13261  const int16_t pad_y,
13262  const int16_t stride_x,
13263  const int16_t stride_y,
13264  const int16_t dilation_x,
13265  const int16_t dilation_y,
13266  const int32_t *bias,
13267  const int32_t in_offset,
13268  const int32_t ker_offset,
13269  const int32_t out_offset,
13270  uint8_t *out_tensor,
13271  const uint16_t out_tensor_dim_x,
13272  const uint16_t out_tensor_dim_y,
13273  const int32_t act_min,
13274  const int32_t act_max,
13275  const int32_t out_shift,
13276  const int32_t out_scale)
13277 {
13278 #if defined(__zcc__)
13279  return tpt_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13280  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13281  ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13282  dilation_x, dilation_y, bias, in_offset, ker_offset, out_offset,
13283  out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13284  out_shift, out_scale);
13285 #else
13286  return riscv_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(
13287  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13288  ker_dim_x, ker_dim_y, ch_mult, pad_x, pad_y, stride_x, stride_y,
13289  dilation_x, dilation_y, bias, in_offset, ker_offset, out_offset,
13290  out_tensor, out_tensor_dim_x, out_tensor_dim_y, act_min, act_max,
13291  out_shift, out_scale);
13292 #endif
13293 }
13294 
13295 #ifdef __riscv_zfh
13331 static inline int32_t hpm_nn_conv_1x1_HWC_f16_f16_f16_bias_any(const float16_t *in_tensor,
13332  const uint16_t in_tensor_dim_x,
13333  const uint16_t in_tensor_dim_y,
13334  const uint16_t in_tensor_ch,
13335  const float16_t *ker_weight,
13336  const uint16_t out_tensor_ch,
13337  const uint16_t ker_dim_x,
13338  const uint16_t ker_dim_y,
13339  const uint16_t pad_x,
13340  const uint16_t pad_y,
13341  const uint16_t stride_x,
13342  const uint16_t stride_y,
13343  const float16_t *bias,
13344  float16_t *out_tensor,
13345  const uint16_t out_tensor_dim_x,
13346  const uint16_t out_tensor_dim_y,
13347  float16_t *in_tmp_buf,
13348  float16_t *tmp_buf)
13349 {
13350 #if defined(__zcc__)
13351  return tpt_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13352  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13353  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13354  bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y, in_tmp_buf,
13355  tmp_buf);
13356 #else
13357  return riscv_nn_conv_1x1_HWC_f16_f16_f16_bias_any(
13358  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13359  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13360  bias, out_tensor, out_tensor_dim_x, out_tensor_dim_y, in_tmp_buf,
13361  tmp_buf);
13362 #endif
13363 }
13364 
13386 static inline int32_t hpm_nn_conv_HWC_f16_f16_f16_bias(const float16_t *in_tensor,
13387  const uint16_t in_tensor_dim,
13388  const uint16_t in_tensor_ch,
13389  const float16_t *ker_weight,
13390  const uint16_t out_tensor_ch,
13391  const uint16_t ker_dim,
13392  const uint16_t pad,
13393  const uint16_t stride,
13394  const float16_t *bias,
13395  float16_t *out_tensor,
13396  const uint16_t out_tensor_dim,
13397  float16_t *in_tmp_buf,
13398  float16_t *tmp_buf)
13399 {
13400 #if defined(__zcc__)
13401  return tpt_nn_conv_HWC_f16_f16_f16_bias(
13402  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13403  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13404  tmp_buf);
13405 #else
13406  return riscv_nn_conv_HWC_f16_f16_f16_bias(
13407  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13408  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13409  tmp_buf);
13410 #endif
13411 }
13412 
13434 static inline int32_t hpm_nn_conv_dw_HWC_f16_f16_f16_bias(const float16_t *in_tensor,
13435  const uint16_t in_tensor_dim,
13436  const uint16_t in_tensor_ch,
13437  const float16_t *ker_weight,
13438  const uint16_t out_tensor_ch,
13439  const uint16_t ker_dim,
13440  const uint16_t pad,
13441  const uint16_t stride,
13442  const float16_t *bias,
13443  float16_t *out_tensor,
13444  const uint16_t out_tensor_dim,
13445  float16_t *in_tmp_buf,
13446  float16_t *tmp_buf)
13447 {
13448 #if defined(__zcc__)
13449  return tpt_nn_conv_dw_HWC_f16_f16_f16_bias(
13450  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13451  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13452  tmp_buf);
13453 #else
13454  return riscv_nn_conv_dw_HWC_f16_f16_f16_bias(
13455  in_tensor, in_tensor_dim, in_tensor_ch, ker_weight, out_tensor_ch,
13456  ker_dim, pad, stride, bias, out_tensor, out_tensor_dim, in_tmp_buf,
13457  tmp_buf);
13458 #endif
13459 }
13460 #endif
13461 
13466 #endif
13467 
13468 #ifdef HPM_EN_MATH_NN_RVP32_LIB
13469 #if defined(__zcc__)
13470 #include "tpt_nn_convolution.h"
13471 #else
13472 #include "riscv_nn_convolution.h"
13473 #endif
13474 
13516 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13517  const uint16_t in_tensor_dim_x,
13518  const uint16_t in_tensor_dim_y,
13519  const uint16_t in_tensor_ch,
13520  const uint16_t in_tensor_group,
13521  const q7_t *ker_weight,
13522  const uint16_t out_tensor_ch,
13523  const uint16_t ker_dim_x,
13524  const uint16_t ker_dim_y,
13525  const uint16_t pad_x,
13526  const uint16_t pad_y,
13527  const uint16_t stride_x,
13528  const uint16_t stride_y,
13529  const int32_t *bias,
13530  q7_t *out_tensor,
13531  const int32_t *out_shift,
13532  const int32_t *out_scale,
13533  const int32_t out_offset,
13534  const int32_t in_offset,
13535  const int32_t act_min,
13536  const int32_t act_max,
13537  const uint16_t out_tensor_dim_x,
13538  const uint16_t out_tensor_dim_y,
13539  q15_t *in_tmp_buf)
13540 {
13541 #if defined(__zcc__)
13542 
13543  tpt_nn_conv_asym_params aConv_params = {stride_x, stride_y, pad_x, pad_y,
13544  in_offset, out_offset, act_min, act_max};
13545 
13546  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13547 
13548  tpt_nn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13549  in_tensor_group, ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y,
13550  out_tensor_ch};
13551 
13552  return tpt_convolve_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13553  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13554 
13555 #else
13556  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any(
13557  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13558  in_tensor_group, ker_weight, out_tensor_ch, ker_dim_x, ker_dim_y, pad_x,
13559  pad_y, stride_x, stride_y, bias, out_tensor, out_shift, out_scale,
13560  out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13561  out_tensor_dim_y, in_tmp_buf);
13562 #endif
13563 }
13564 
13611 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
13612  const uint16_t in_tensor_dim_x,
13613  const uint16_t in_tensor_dim_y,
13614  const uint16_t in_tensor_ch,
13615  const uint16_t in_tensor_group,
13616  const q7_t *ker_weight,
13617  const uint16_t out_tensor_ch,
13618  const uint16_t pad_x,
13619  const uint16_t pad_y,
13620  const uint16_t stride_x,
13621  const uint16_t stride_y,
13622  const int32_t *bias,
13623  q7_t *out_tensor,
13624  const int32_t *out_shift,
13625  const int32_t *out_scale,
13626  const int32_t out_offset,
13627  const int32_t in_offset,
13628  const int32_t act_min,
13629  const int32_t act_max,
13630  const uint16_t out_tensor_dim_x,
13631  const uint16_t out_tensor_dim_y,
13632  q15_t *tmp_buf)
13633 {
13634 #if defined(__zcc__)
13635 
13636  tpt_nn_conv_1x1_asym_params aConv_params = {in_offset, out_offset, stride_x,
13637  stride_y, pad_x, pad_y, act_min, act_max};
13638 
13639  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13640 
13641  tpt_nn_1x1_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13642  in_tensor_group, out_tensor_ch};
13643 
13644  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13645  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13646 
13647 #else
13648  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(
13649  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13650  in_tensor_group, ker_weight, out_tensor_ch, pad_x, pad_y, stride_x,
13651  stride_y, bias, out_tensor, out_shift, out_scale, out_offset, in_offset,
13652  act_min, act_max, out_tensor_dim_x, out_tensor_dim_y, tmp_buf);
13653 #endif
13654 }
13655 
13703 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13704  const uint16_t in_tensor_dim_x,
13705  const uint16_t in_tensor_dim_y,
13706  const uint16_t in_tensor_ch,
13707  const q7_t *ker_weight,
13708  const uint16_t out_tensor_ch,
13709  const uint16_t ch_mult,
13710  const uint16_t ker_dim_x,
13711  const uint16_t ker_dim_y,
13712  const uint16_t pad_x,
13713  const uint16_t pad_y,
13714  const uint16_t stride_x,
13715  const uint16_t stride_y,
13716  const int32_t *bias,
13717  q7_t *out_tensor,
13718  const int32_t *out_shift,
13719  const int32_t *out_scale,
13720  const uint16_t out_tensor_dim_x,
13721  const uint16_t out_tensor_dim_y,
13722  const int32_t out_offset,
13723  const int32_t in_offset,
13724  const int32_t act_min,
13725  const int32_t act_max,
13726  const uint16_t dilation_x,
13727  const uint16_t dilation_y,
13728  q15_t *tmp_buf)
13729 {
13730 #if defined(__zcc__)
13731 
13732  tpt_nn_dw_conv_asym_params aConv_params = {in_offset, out_offset, ch_mult,
13733  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13734 
13735  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13736 
13737  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13738  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13739 
13740  return tpt_depthwise_conv_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13741  bias, &aConv_params, &aQuant_params, &aConv_dims, tmp_buf);
13742 
13743 #else
13744  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(
13745  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13746  out_tensor_ch, ch_mult, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x,
13747  stride_y, bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13748  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13749  dilation_y, tmp_buf);
13750 #endif
13751 }
13752 
13792 static inline int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor,
13793  const uint16_t in_tensor_dim_x,
13794  const uint16_t in_tensor_ch,
13795  const uint16_t in_tensor_group,
13796  const q7_t *ker_weight,
13797  const uint16_t out_tensor_ch,
13798  const uint16_t ker_dim_x,
13799  const uint16_t pad_x,
13800  const uint16_t stride_x,
13801  const int32_t *bias,
13802  q7_t *out_tensor,
13803  const int32_t *out_shift,
13804  const int32_t *out_scale,
13805  const int32_t out_offset,
13806  const int32_t in_offset,
13807  const int32_t act_min,
13808  const int32_t act_max,
13809  const uint16_t out_tensor_dim_x,
13810  q15_t *in_tmp_buf)
13811 {
13812 #if defined(__zcc__)
13813 
13814  tpt_nn_conv_1xn_asym_params aConv_params = {in_offset, out_offset, stride_x, pad_x,
13815  act_min, act_max};
13816 
13817  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13818 
13819  tpt_nn_1xn_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_ch, in_tensor_group,
13820  ker_dim_x, out_tensor_dim_x, out_tensor_ch};
13821 
13822  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any(out_tensor, in_tensor, ker_weight,
13823  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13824 
13825 #else
13826  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(
13827  in_tensor, in_tensor_dim_x, in_tensor_ch, in_tensor_group, ker_weight,
13828  out_tensor_ch, ker_dim_x, pad_x, stride_x, bias, out_tensor, out_shift,
13829  out_scale, out_offset, in_offset, act_min, act_max, out_tensor_dim_x,
13830  in_tmp_buf);
13831 #endif
13832 }
13833 
13878 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor,
13879  const uint16_t in_tensor_dim_x,
13880  const uint16_t in_tensor_dim_y,
13881  const uint16_t in_tensor_ch,
13882  const q7_t *ker_weight,
13883  const uint16_t out_tensor_ch,
13884  const uint16_t ker_dim_x,
13885  const uint16_t ker_dim_y,
13886  const uint16_t pad_x,
13887  const uint16_t pad_y,
13888  const uint16_t stride_x,
13889  const uint16_t stride_y,
13890  const int32_t *bias,
13891  q7_t *out_tensor,
13892  const int32_t *out_shift,
13893  const int32_t *out_scale,
13894  const uint16_t out_tensor_dim_x,
13895  const uint16_t out_tensor_dim_y,
13896  const int32_t out_offset,
13897  const int32_t in_offset,
13898  const int32_t act_min,
13899  const int32_t act_max,
13900  const uint16_t dilation_x,
13901  const uint16_t dilation_y,
13902  q15_t *in_tmp_buf)
13903 {
13904 #if defined(__zcc__)
13905 
13906  tpt_nn_dw_conv_asym_fast_params aConv_params = {in_offset, out_offset,
13907  stride_x, stride_y, pad_x, pad_y, dilation_x, dilation_y, act_min, act_max};
13908 
13909  tpt_nn_per_channel_quant_params aQuant_params = {out_scale, out_shift};
13910 
13911  tpt_nn_dw_conv_asym_dims aConv_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
13912  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y, out_tensor_ch};
13913 
13914  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any(out_tensor, in_tensor, ker_weight,
13915  bias, &aConv_params, &aQuant_params, &aConv_dims, in_tmp_buf);
13916 
13917 #else
13918  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(
13919  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_weight,
13920  out_tensor_ch, ker_dim_x, ker_dim_y, pad_x, pad_y, stride_x, stride_y,
13921  bias, out_tensor, out_shift, out_scale, out_tensor_dim_x,
13922  out_tensor_dim_y, out_offset, in_offset, act_min, act_max, dilation_x,
13923  dilation_y, in_tmp_buf);
13924 #endif
13925 }
13926 
13933 static inline int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch)
13934 {
13935 #if defined(__zcc__)
13936  return tpt_convolve_1x1_s8_s8_s8_asym_bias_any_get_buf_size(
13937  in_tensor_ch);
13938 #else
13939  return riscv_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13940  in_tensor_ch);
13941 #endif
13942 }
13943 
13952 static inline int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch,
13953  const uint16_t ker_dim_x,
13954  const uint16_t ker_dim_y)
13955 {
13956 #if defined(__zcc__)
13957  return tpt_depthwise_conv_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13958  in_tensor_ch, ker_dim_x, ker_dim_y);
13959 #else
13960  return riscv_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(
13961  in_tensor_ch, ker_dim_x, ker_dim_y);
13962 #endif
13963 }
13964 
13974 static inline int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
13975  const uint16_t ker_dim_x,
13976  const uint16_t ker_dim_y)
13977 {
13978 #if defined(__zcc__)
13979  return tpt_convolve_1xn_s8_s8_s8_asym_bias_any_get_buffer_size(
13980  in_tensor_ch, ker_dim_x, ker_dim_y);
13981 #else
13982  return riscv_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
13983  in_tensor_ch, ker_dim_x, ker_dim_y);
13984 #endif
13985 }
13986 
13995 static inline int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch,
13996  const uint16_t ker_dim_x,
13997  const uint16_t ker_dim_y)
13998 
13999 {
14000 #if defined(__zcc__)
14001  return tpt_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14002  in_tensor_ch, ker_dim_x, ker_dim_y);
14003 #else
14004  return riscv_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(
14005  in_tensor_ch, ker_dim_x, ker_dim_y);
14006 #endif
14007 }
14008 
14009 #endif
14010 
14011 #endif
14012 
14013 #ifdef HPM_MATH_NN_CONNECTED
14014 #ifdef HPM_EN_MATH_NN_LIB
14015 #if defined(__zcc__)
14016 #include "tpt_nn_fully_connected.h"
14017 #else
14018 #include "riscv_nn_fully_connected.h"
14019 #endif
14020 
14064 static inline int32_t hpm_nn_fc_s8_s8_s8_sft_bias(const q7_t *in_vec,
14065  const q7_t *wt_mat,
14066  const uint16_t size,
14067  const uint16_t wt_row_num,
14068  const uint16_t bias_lshift,
14069  const uint16_t out_rshift,
14070  const q7_t *bias,
14071  q7_t *out_vec,
14072  q15_t *in_tmp_buf)
14073 #if defined(__zcc__)
14074  return tpt_nn_fc_s8_s8_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
14076  in_tmp_buf);
14077 #else
14078  return riscv_nn_fc_s8_s8_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
14081 #endif
14082 }
14083 
14105 static inline int32_t hpm_nn_fc_s8_s8_s8_sft_bias_fast(const q7_t *in_vec,
14106  const q7_t *wt_mat,
14107  const uint16_t size,
14108  const uint16_t wt_row_num,
14109  const uint16_t bias_lshift,
14110  const uint16_t out_rshift,
14111  const q7_t *bias,
14112  q7_t *out_vec,
14113  q15_t *in_tmp_buf)
14114 {
14115 #if defined(__zcc__)
14116  return tpt_nn_fc_s8_s8_s8_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14118  out_vec, in_tmp_buf);
14119 #else
14120  return riscv_nn_fc_s8_s8_s8_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14122  out_vec, in_tmp_buf);
14123 #endif
14124 }
14125 
14140 static inline int32_t hpm_nn_fc_s16_s16_s16_sft_bias(const q15_t *in_vec,
14141  const q15_t *wt_mat,
14142  const uint16_t size,
14143  const uint16_t wt_row_num,
14144  const uint16_t bias_lshift,
14145  const uint16_t out_rshift,
14146  const q15_t *bias,
14147  q15_t *out_vec,
14148  q15_t *tmp_buf)
14149 {
14150 #if defined(__zcc__)
14151  return tpt_nn_fc_s16_s16_s16_sft_bias(in_vec, wt_mat, size, wt_row_num,
14153  tmp_buf);
14154 #else
14155  return riscv_nn_fc_s16_s16_s16_sft_bias(in_vec, wt_mat, size, wt_row_num,
14157  out_vec, tmp_buf);
14158 #endif
14159 }
14160 
14183 static inline int32_t hpm_nn_fc_s16_s16_s16_sft_bias_fast(const q15_t *in_vec,
14184  const q15_t *wt_mat,
14185  const uint16_t size,
14186  const uint16_t wt_row_num,
14187  const uint16_t bias_lshift,
14188  const uint16_t out_rshift,
14189  const q15_t *bias,
14190  q15_t *out_vec,
14191  q15_t *in_tmp_buf)
14192 {
14193 #if defined(__zcc__)
14194  return tpt_nn_fc_s16_s16_s16_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14196  out_vec, in_tmp_buf);
14197 #else
14198  return riscv_nn_fc_s16_s16_s16_sft_bias_fast(in_vec, wt_mat, size, wt_row_num,
14200  out_vec, in_tmp_buf);
14201 #endif
14202 }
14203 
14219 static inline int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias(const q15_t *in_vec,
14220  const q7_t *wt_mat,
14221  const uint16_t size,
14222  const uint16_t wt_row_num,
14223  const uint16_t bias_lshift,
14224  const uint16_t out_rshift,
14225  const q7_t *bias,
14226  q15_t *out_vec,
14227  q15_t *tmp_buf)
14228 {
14229 #if defined(__zcc__)
14230  return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias(in_vec, wt_mat, size, wt_row_num,
14232  out_vec, tmp_buf);
14233 #else
14234  return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias(
14236  tmp_buf);
14237 #endif
14238 }
14239 
14261 static inline int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(const q15_t *in_vec,
14262  const q7_t *wt_mat,
14263  const uint16_t size,
14264  const uint16_t wt_row_num,
14265  const uint16_t bias_lshift,
14266  const uint16_t out_rshift,
14267  const q7_t *bias,
14268  q15_t *out_vec,
14269  q15_t *tmp_buf)
14270 {
14271 #if defined(__zcc__)
14272  return tpt_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14274  tmp_buf);
14275 #else
14276  return riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(
14278  tmp_buf);
14279 #endif
14280 }
14281 
14306 static inline int32_t hpm_nn_fc_s8_s8_s8_sym_bias(const q7_t *in_vec,
14307  const q7_t *wt_mat,
14308  const uint16_t size,
14309  const uint16_t wt_row_num,
14310  const uint16_t pre_rshift,
14311  const uint16_t out_scale,
14312  const uint16_t post_rshift,
14313  const q31_t *bias,
14314  q7_t *out_vec,
14315  q15_t *in_tmp_buf)
14316 {
14317 #if defined(__zcc__)
14318  return tpt_nn_fc_s8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14319  pre_rshift, out_scale, post_rshift, bias,
14320  out_vec, in_tmp_buf);
14321 #else
14322  return riscv_nn_fc_s8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14323  pre_rshift, out_scale, post_rshift, bias,
14324  out_vec, in_tmp_buf);
14325 #endif
14326 }
14327 
14352 static inline int32_t hpm_nn_fc_s8_s16_s8_sym_bias(const q7_t *in_vec,
14353  const q7_t *wt_mat,
14354  const uint16_t size,
14355  const uint16_t wt_row_num,
14356  const uint16_t pre_rshift,
14357  const uint16_t out_scale,
14358  const uint16_t post_rshift,
14359  const q31_t *bias,
14360  q15_t *out_vec,
14361  q15_t *in_tmp_buf)
14362 {
14363 #if defined(__zcc__)
14364  return tpt_nn_fc_s8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14365  pre_rshift, out_scale, post_rshift, bias,
14366  out_vec, in_tmp_buf);
14367 #else
14368  return riscv_nn_fc_s8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14369  pre_rshift, out_scale, post_rshift,
14370  bias, out_vec, in_tmp_buf);
14371 #endif
14372 }
14373 
14398 static inline int32_t hpm_nn_fc_u8_u8_s8_sym_bias(const u8_t *in_vec,
14399  const q7_t *wt_mat,
14400  const uint16_t size,
14401  const uint16_t wt_row_num,
14402  const uint16_t pre_rshift,
14403  const uint16_t out_scale,
14404  const uint16_t post_rshift,
14405  const q31_t *bias,
14406  u8_t *out_vec,
14407  q15_t *in_tmp_buf)
14408 {
14409 #if defined(__zcc__)
14410  return tpt_nn_fc_u8_u8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14411  pre_rshift, out_scale, post_rshift, bias,
14412  out_vec, in_tmp_buf);
14413 #else
14414  return riscv_nn_fc_u8_u8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14415  pre_rshift, out_scale, post_rshift, bias,
14416  out_vec, in_tmp_buf);
14417 #endif
14418 }
14419 
14444 static inline int32_t hpm_nn_fc_u8_s8_s8_sym_bias(const u8_t *in_vec,
14445  const q7_t *wt_mat,
14446  const uint16_t size,
14447  const uint16_t wt_row_num,
14448  const uint16_t pre_rshift,
14449  const uint16_t out_scale,
14450  const uint16_t post_rshift,
14451  const q31_t *bias,
14452  q7_t *out_vec,
14453  q15_t *in_tmp_buf)
14454 {
14455 #if defined(__zcc__)
14456  return tpt_nn_fc_u8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14457  pre_rshift, out_scale, post_rshift, bias,
14458  out_vec, in_tmp_buf);
14459 #else
14460  return riscv_nn_fc_u8_s8_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14461  pre_rshift, out_scale, post_rshift, bias,
14462  out_vec, in_tmp_buf);
14463 #endif
14464 }
14465 
14490 static inline int32_t hpm_nn_fc_u8_s16_s8_sym_bias(const u8_t *in_vec,
14491  const q7_t *wt_mat,
14492  const uint16_t size,
14493  const uint16_t wt_row_num,
14494  const uint16_t pre_rshift,
14495  const uint16_t out_scale,
14496  const uint16_t post_rshift,
14497  const q31_t *bias,
14498  q15_t *out_vec,
14499  q15_t *in_tmp_buf)
14500 {
14501 #if defined(__zcc__)
14502  return tpt_nn_fc_u8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14503  pre_rshift, out_scale, post_rshift, bias,
14504  out_vec, in_tmp_buf);
14505 #else
14506  return riscv_nn_fc_u8_s16_s8_sym_bias(in_vec, wt_mat, size, wt_row_num,
14507  pre_rshift, out_scale, post_rshift,
14508  bias, out_vec, in_tmp_buf);
14509 #endif
14510 }
14511 
14535 static inline int32_t hpm_nn_fc_s8_s8_s8_sym(const q7_t *in_vec,
14536  const q7_t *wt_mat,
14537  const uint16_t size,
14538  const uint16_t wt_row_num,
14539  const uint16_t pre_rshift,
14540  const uint16_t out_scale,
14541  const uint16_t post_rshift,
14542  q7_t *out_vec,
14543  q15_t *in_tmp_buf)
14544 {
14545 #if defined(__zcc__)
14546  return tpt_nn_fc_s8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14547  out_scale, post_rshift, out_vec, in_tmp_buf);
14548 #else
14549  return riscv_nn_fc_s8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14550  out_scale, post_rshift, out_vec, in_tmp_buf);
14551 #endif
14552 }
14553 
14577 static inline int32_t hpm_nn_fc_s8_s16_s8_sym(const q7_t *in_vec,
14578  const q7_t *wt_mat,
14579  const uint16_t size,
14580  const uint16_t wt_row_num,
14581  const uint16_t pre_rshift,
14582  const uint16_t out_scale,
14583  const uint16_t post_rshift,
14584  q15_t *out_vec,
14585  q15_t *in_tmp_buf)
14586 {
14587 #if defined(__zcc__)
14588  return tpt_nn_fc_s8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14589  out_scale, post_rshift, out_vec, in_tmp_buf);
14590 #else
14591  return riscv_nn_fc_s8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14592  out_scale, post_rshift, out_vec, in_tmp_buf);
14593 #endif
14594 }
14595 
14619 static inline int32_t hpm_nn_fc_u8_u8_s8_sym(const u8_t *in_vec,
14620  const q7_t *wt_mat,
14621  const uint16_t size,
14622  const uint16_t wt_row_num,
14623  const uint16_t pre_rshift,
14624  const uint16_t out_scale,
14625  const uint16_t post_rshift,
14626  u8_t *out_vec,
14627  q15_t *in_tmp_buf)
14628 {
14629 #if defined(__zcc__)
14630  return tpt_nn_fc_u8_u8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14631  out_scale, post_rshift, out_vec, in_tmp_buf);
14632 #else
14633  return riscv_nn_fc_u8_u8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14634  out_scale, post_rshift, out_vec, in_tmp_buf);
14635 #endif
14636 }
14637 
14661 static inline int32_t hpm_nn_fc_u8_s8_s8_sym(const u8_t *in_vec,
14662  const q7_t *wt_mat,
14663  const uint16_t size,
14664  const uint16_t wt_row_num,
14665  const uint16_t pre_rshift,
14666  const uint16_t out_scale,
14667  const uint16_t post_rshift,
14668  q7_t *out_vec,
14669  q15_t *in_tmp_buf)
14670 {
14671 #if defined(__zcc__)
14672  return tpt_nn_fc_u8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14673  out_scale, post_rshift, out_vec, in_tmp_buf);
14674 #else
14675  return riscv_nn_fc_u8_s8_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14676  out_scale, post_rshift, out_vec, in_tmp_buf);
14677 #endif
14678 }
14679 
14703 static inline int32_t hpm_nn_fc_u8_s16_s8_sym(const u8_t *in_vec,
14704  const q7_t *wt_mat,
14705  const uint16_t size,
14706  const uint16_t wt_row_num,
14707  const uint16_t pre_rshift,
14708  const uint16_t out_scale,
14709  const uint16_t post_rshift,
14710  q15_t *out_vec,
14711  q15_t *in_tmp_buf)
14712 {
14713 #if defined(__zcc__)
14714  return tpt_nn_fc_u8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14715  out_scale, post_rshift, out_vec, in_tmp_buf);
14716 #else
14717  return riscv_nn_fc_u8_s16_s8_sym(in_vec, wt_mat, size, wt_row_num, pre_rshift,
14718  out_scale, post_rshift, out_vec, in_tmp_buf);
14719 #endif
14720 }
14721 
14746 static inline int32_t hpm_nn_fc_s8_s8_s8_sym_bias_fast(const q7_t *in_vec,
14747  const q7_t *wt_mat,
14748  const uint16_t size,
14749  const uint16_t wt_row_num,
14750  const uint16_t pre_rshift,
14751  const uint16_t out_scale,
14752  const uint16_t post_rshift,
14753  const q31_t *bias,
14754  q7_t *out_vec,
14755  q15_t *in_tmp_buf)
14756 {
14757 #if defined(__zcc__)
14758  return tpt_nn_fc_s8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14759  pre_rshift, out_scale, post_rshift,
14760  bias, out_vec, in_tmp_buf);
14761 #else
14762  return riscv_nn_fc_s8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14763  pre_rshift, out_scale, post_rshift,
14764  bias, out_vec, in_tmp_buf);
14765 #endif
14766 }
14767 
14793 static inline int32_t hpm_nn_fc_s8_s16_s8_sym_bias_fast(const q7_t *in_vec,
14794  const q7_t *wt_mat,
14795  const uint16_t size,
14796  const uint16_t wt_row_num,
14797  const uint16_t pre_rshift,
14798  const uint16_t out_scale,
14799  const uint16_t post_rshift,
14800  const q31_t *bias,
14801  q15_t *out_vec,
14802  q15_t *in_tmp_buf)
14803 {
14804 #if defined(__zcc__)
14805  return tpt_nn_fc_s8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14806  pre_rshift, out_scale, post_rshift,
14807  bias, out_vec, in_tmp_buf);
14808 #else
14809  return riscv_nn_fc_s8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14810  pre_rshift, out_scale, post_rshift,
14811  bias, out_vec, in_tmp_buf);
14812 #endif
14813 }
14814 
14839 static inline int32_t hpm_nn_fc_u8_u8_s8_sym_bias_fast(const u8_t *in_vec,
14840  const q7_t *wt_mat,
14841  const uint16_t size,
14842  const uint16_t wt_row_num,
14843  const uint16_t pre_rshift,
14844  const uint16_t out_scale,
14845  const uint16_t post_rshift,
14846  const q31_t *bias,
14847  u8_t *out_vec,
14848  q15_t *in_tmp_buf)
14849 {
14850 #if defined(__zcc__)
14851  return tpt_nn_fc_u8_u8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14852  pre_rshift, out_scale, post_rshift,
14853  bias, out_vec, in_tmp_buf);
14854 #else
14855  return riscv_nn_fc_u8_u8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14856  pre_rshift, out_scale, post_rshift,
14857  bias, out_vec, in_tmp_buf);
14858 #endif
14859 }
14860 
14886 static inline int32_t hpm_nn_fc_u8_s8_s8_sym_bias_fast(const u8_t *in_vec,
14887  const q7_t *wt_mat,
14888  const uint16_t size,
14889  const uint16_t wt_row_num,
14890  const uint16_t pre_rshift,
14891  const uint16_t out_scale,
14892  const uint16_t post_rshift,
14893  const q31_t *bias,
14894  q7_t *out_vec,
14895  q15_t *in_tmp_buf)
14896 {
14897 #if defined(__zcc__)
14898  return tpt_nn_fc_u8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14899  pre_rshift, out_scale, post_rshift,
14900  bias, out_vec, in_tmp_buf);
14901 #else
14902  return riscv_nn_fc_u8_s8_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14903  pre_rshift, out_scale, post_rshift,
14904  bias, out_vec, in_tmp_buf);
14905 #endif
14906 }
14907 
14933 static inline int32_t hpm_nn_fc_u8_s16_s8_sym_bias_fast(const u8_t *in_vec,
14934  const q7_t *wt_mat,
14935  const uint16_t size,
14936  const uint16_t wt_row_num,
14937  const uint16_t pre_rshift,
14938  const uint16_t out_scale,
14939  const uint16_t post_rshift,
14940  const q31_t *bias,
14941  q15_t *out_vec,
14942  q15_t *in_tmp_buf)
14943 {
14944 #if defined(__zcc__)
14945  return tpt_nn_fc_u8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14946  pre_rshift, out_scale, post_rshift,
14947  bias, out_vec, in_tmp_buf);
14948 #else
14949  return riscv_nn_fc_u8_s16_s8_sym_bias_fast(in_vec, wt_mat, size, wt_row_num,
14950  pre_rshift, out_scale, post_rshift,
14951  bias, out_vec, in_tmp_buf);
14952 #endif
14953 }
14954 
14978 static inline int32_t hpm_nn_fc_s8_s8_s8_sym_fast(const q7_t *in_vec,
14979  const q7_t *wt_mat,
14980  const uint16_t size,
14981  const uint16_t wt_row_num,
14982  const uint16_t pre_rshift,
14983  const uint16_t out_scale,
14984  const uint16_t post_rshift,
14985  q7_t *out_vec,
14986  q15_t *in_tmp_buf)
14987 {
14988 #if defined(__zcc__)
14989  return tpt_nn_fc_s8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
14990  pre_rshift, out_scale, post_rshift,
14991  out_vec, in_tmp_buf);
14992 #else
14993  return riscv_nn_fc_s8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
14994  pre_rshift, out_scale, post_rshift,
14995  out_vec, in_tmp_buf);
14996 #endif
14997 }
14998 
15023 static inline int32_t hpm_nn_fc_s8_s16_s8_sym_fast(const q7_t *in_vec,
15024  const q7_t *wt_mat,
15025  const uint16_t size,
15026  const uint16_t wt_row_num,
15027  const uint16_t pre_rshift,
15028  const uint16_t out_scale,
15029  const uint16_t post_rshift,
15030  q15_t *out_vec,
15031  q15_t *in_tmp_buf)
15032 {
15033 #if defined(__zcc__)
15034  return tpt_nn_fc_s8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15035  pre_rshift, out_scale, post_rshift,
15036  out_vec, in_tmp_buf);
15037 #else
15038  return riscv_nn_fc_s8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15039  pre_rshift, out_scale, post_rshift,
15040  out_vec, in_tmp_buf);
15041 #endif
15042 }
15043 
15067 static inline int32_t hpm_nn_fc_u8_u8_s8_sym_fast(const u8_t *in_vec,
15068  const q7_t *wt_mat,
15069  const uint16_t size,
15070  const uint16_t wt_row_num,
15071  const uint16_t pre_rshift,
15072  const uint16_t out_scale,
15073  const uint16_t post_rshift,
15074  u8_t *out_vec,
15075  q15_t *in_tmp_buf)
15076 {
15077 #if defined(__zcc__)
15078  return tpt_nn_fc_u8_u8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15079  pre_rshift, out_scale, post_rshift,
15080  out_vec, in_tmp_buf);
15081 #else
15082  return riscv_nn_fc_u8_u8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15083  pre_rshift, out_scale, post_rshift,
15084  out_vec, in_tmp_buf);
15085 #endif
15086 }
15087 
15112 static inline int32_t hpm_nn_fc_u8_s8_s8_sym_fast(const u8_t *in_vec,
15113  const q7_t *wt_mat,
15114  const uint16_t size,
15115  const uint16_t wt_row_num,
15116  const uint16_t pre_rshift,
15117  const uint16_t out_scale,
15118  const uint16_t post_rshift,
15119  q7_t *out_vec,
15120  q15_t *in_tmp_buf)
15121 {
15122 #if defined(__zcc__)
15123  return tpt_nn_fc_u8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15124  pre_rshift, out_scale, post_rshift,
15125  out_vec, in_tmp_buf);
15126 #else
15127  return riscv_nn_fc_u8_s8_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15128  pre_rshift, out_scale, post_rshift,
15129  out_vec, in_tmp_buf);
15130 #endif
15131 }
15132 
15157 static inline int32_t hpm_nn_fc_u8_s16_s8_sym_fast(const u8_t *in_vec,
15158  const q7_t *wt_mat,
15159  const uint16_t size,
15160  const uint16_t wt_row_num,
15161  const uint16_t pre_rshift,
15162  const uint16_t out_scale,
15163  const uint16_t post_rshift,
15164  q15_t *out_vec,
15165  q15_t *in_tmp_buf)
15166 {
15167 #if defined(__zcc__)
15168  return tpt_nn_fc_u8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15169  pre_rshift, out_scale, post_rshift,
15170  out_vec, in_tmp_buf);
15171 #else
15172  return riscv_nn_fc_u8_s16_s8_sym_fast(in_vec, wt_mat, size, wt_row_num,
15173  pre_rshift, out_scale, post_rshift,
15174  out_vec, in_tmp_buf);
15175 #endif
15176 }
15177 
15188 static inline void hpm_nn_fc_s8_wt_converter(const q7_t *wt_mat,
15189  const uint32_t size,
15190  const uint32_t wt_row_num,
15191  q7_t *wt_mat_out)
15192 {
15193 #if defined(__zcc__)
15194  tpt_nn_fc_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15195 #else
15196  riscv_nn_fc_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15197 #endif
15198 }
15199 
15210 static inline void hpm_nn_fc_s16_wt_converter(const q15_t *wt_mat,
15211  const uint32_t size,
15212  const uint32_t wt_row_num,
15213  q15_t *wt_mat_out)
15214 {
15215 #if defined(__zcc__)
15216  tpt_nn_fc_s16_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15217 #else
15218  riscv_nn_fc_s16_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15219 #endif
15220 }
15221 
15231 static inline void hpm_nn_fc_mat_vec_s8_wt_converter(const q7_t *wt_mat,
15232  const uint32_t size,
15233  const uint32_t wt_row_num,
15234  q7_t *wt_mat_out)
15235 {
15236 #if defined(__zcc__)
15237  tpt_nn_fc_mat_vec_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15238 #else
15239  riscv_nn_fc_mat_vec_s8_wt_converter(wt_mat, size, wt_row_num, wt_mat_out);
15240 #endif
15241 }
15242 
15273 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec,
15274  const int8_t *wt_mat,
15275  const uint16_t in_vec_col,
15276  const uint16_t wt_mat_row,
15277  const uint16_t in_vec_group,
15278  const int32_t in_offset,
15279  const int32_t wt_offset,
15280  const int32_t out_scale,
15281  const int32_t out_shift,
15282  const int32_t out_offset,
15283  const int32_t *bias,
15284  int8_t *out_vec,
15285  const int32_t act_min,
15286  const int32_t act_max,
15287  q15_t *tmp_buf)
15288 {
15289 #if defined(__zcc__)
15290 
15291  tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15292  out_shift, act_min, act_max};
15293  tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15294 
15295  return tpt_fully_connected_s8(out_vec, in_vec, wt_mat, bias, &aFc_params,
15296  &aFC_dims, tmp_buf);
15297 #else
15298  return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec, wt_mat, in_vec_col, wt_mat_row,
15299  in_vec_group, in_offset, wt_offset,
15300  out_scale, out_shift, out_offset, bias,
15301  out_vec, act_min, act_max, tmp_buf);
15302 #endif
15303 }
15304 
15312 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
15313 {
15314 #if defined(__zcc__)
15315  return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15316 #else
15317  return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15318 #endif
15319 }
15320 
15325 #endif
15326 
15327 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15328 #if defined(__zcc__)
15329 #include "tpt_nn_fully_connected.h"
15330 #else
15331 #include "riscv_nn_fully_connected.h"
15332 #endif
15333 
15364 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec,
15365  const int8_t *wt_mat,
15366  const uint16_t in_vec_col,
15367  const uint16_t wt_mat_row,
15368  const uint16_t in_vec_group,
15369  const int32_t in_offset,
15370  const int32_t wt_offset,
15371  const int32_t out_scale,
15372  const int32_t out_shift,
15373  const int32_t out_offset,
15374  const int32_t *bias,
15375  int8_t *out_vec,
15376  const int32_t act_min,
15377  const int32_t act_max,
15378  q15_t *tmp_buf)
15379 {
15380 #if defined(__zcc__)
15381 
15382  tpt_nn_fc_params_asym_s8 aFc_params = {in_offset, wt_offset, out_offset, out_scale,
15383  out_shift, act_min, act_max};
15384  tpt_nn_fc_dims_asym_s8 aFC_dims = {in_vec_col, in_vec_group, wt_mat_row};
15385 
15386  return tpt_fully_connected_s8(out_vec, in_vec, wt_mat, bias, &aFc_params,
15387  &aFC_dims, tmp_buf);
15388 #else
15389  return riscv_nn_fc_s8_s8_s8_asym_bias(in_vec, wt_mat, in_vec_col, wt_mat_row,
15390  in_vec_group, in_offset, wt_offset,
15391  out_scale, out_shift, out_offset, bias,
15392  out_vec, act_min, act_max, tmp_buf);
15393 #endif
15394 }
15395 
15403 static inline int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
15404 {
15405 #if defined(__zcc__)
15406  return tpt_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15407 #else
15408  return riscv_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(in_vec_col);
15409 #endif
15410 }
15411 
15412 #endif /* HPM_EN_MATH_NN_RVP32_LIB */
15413 
15414 #endif
15415 
15416 #ifdef HPM_MATH_NN_POOLING
15417 #ifdef HPM_EN_MATH_NN_LIB
15418 #if defined(__zcc__)
15419 #include "tpt_nn_pooling.h"
15420 #else
15421 #include "riscv_nn_pooling.h"
15422 #endif
15423 
15466 static inline void hpm_nn_avepool_HWC_s8(q7_t *in_tensor,
15467  const uint16_t in_tensor_dim,
15468  const uint16_t in_tensor_ch,
15469  const uint16_t ker_dim,
15470  const uint16_t pad,
15471  const uint16_t stride,
15472  const uint16_t out_tensor_dim,
15473  q7_t *in_tmp_buf,
15474  q7_t *out_tensor)
15475 {
15476 #if defined(__zcc__)
15477  tpt_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15478  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15479 #else
15480  riscv_nn_avepool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15481  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15482 #endif
15483 }
15484 
15531 static inline void hpm_nn_avepool_HWC_s8_any(q7_t *in_tensor,
15532  const uint16_t in_tensor_dim_x,
15533  const uint16_t in_tensor_dim_y,
15534  const uint16_t in_tensor_ch,
15535  const uint16_t ker_dim_x,
15536  const uint16_t ker_dim_y,
15537  const uint16_t pad_x,
15538  const uint16_t pad_y,
15539  const uint16_t stride_x,
15540  const uint16_t stride_y,
15541  const uint16_t out_tensor_dim_x,
15542  const uint16_t out_tensor_dim_y,
15543  q7_t *in_tmp_buf,
15544  q7_t *out_tensor,
15545  const uint16_t out_lshift)
15546 {
15547 #if defined(__zcc__)
15548  tpt_nn_avepool_HWC_s8_any(
15549  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15550  ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15551  out_tensor_dim_y, in_tmp_buf, out_tensor, out_lshift);
15552 #else
15553  riscv_nn_avepool_HWC_s8_any(
15554  in_tensor, in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch, ker_dim_x,
15555  ker_dim_y, pad_x, pad_y, stride_x, stride_y, out_tensor_dim_x,
15556  out_tensor_dim_y, in_tmp_buf, out_tensor, out_lshift);
15557 #endif
15558 }
15559 
15588 static inline int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y,
15589  const int in_tensor_dim_x,
15590  const int out_tensor_dim_y,
15591  const int out_tensor_dim_x,
15592  const int stride_y,
15593  const int stride_x,
15594  const int ker_dim_y,
15595  const int ker_dim_x,
15596  const int pad_y,
15597  const int pad_x,
15598  const int act_min,
15599  const int act_max,
15600  const int in_tensor_ch,
15601  int8_t *in_tensor,
15602  int16_t *in_tmp_buf,
15603  int8_t *out_tensor)
15604 {
15605 #if defined(__zcc__)
15606 
15607  tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15608  act_min, act_max};
15609  tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15610  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15611 
15612  return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims, in_tmp_buf);
15613 
15614 #else
15615  return riscv_nn_avepool_HWC_s8_any_act(
15616  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15617  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15618  in_tensor_ch, in_tensor, in_tmp_buf, out_tensor);
15619 #endif
15620 }
15621 
15630 static inline int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
15631 {
15632 #if defined(__zcc__)
15633  return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15634  in_tensor_ch);
15635 #else
15636  return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15637  in_tensor_ch);
15638 #endif
15639 }
15640 
15670  static inline void hpm_nn_maxpool_HWC_s8(q7_t *in_tensor,
15671  const uint16_t in_tensor_dim,
15672  const uint16_t in_tensor_ch,
15673  const uint16_t ker_dim,
15674  const uint16_t pad,
15675  const uint16_t stride,
15676  const uint16_t out_tensor_dim,
15677  q7_t *in_tmp_buf,
15678  q7_t *out_tensor)
15679 {
15680 #if defined(__zcc__)
15681  tpt_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15682  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15683 #else
15684  riscv_nn_maxpool_HWC_s8(in_tensor, in_tensor_dim, in_tensor_ch, ker_dim, pad,
15685  stride, out_tensor_dim, in_tmp_buf, out_tensor);
15686 #endif
15687 }
15688 
15715 static inline int32_t hpm_nn_maxpool_HWC_s8_any_act(const uint16_t in_tensor_dim_y,
15716  const uint16_t in_tensor_dim_x,
15717  const uint16_t out_tensor_dim_y,
15718  const uint16_t out_tensor_dim_x,
15719  const uint16_t stride_y,
15720  const uint16_t stride_x,
15721  const uint16_t ker_dim_y,
15722  const uint16_t ker_dim_x,
15723  const uint16_t pad_y,
15724  const uint16_t pad_x,
15725  const int8_t act_min,
15726  const int8_t act_max,
15727  const uint16_t in_tensor_ch,
15728  int8_t *in_tensor,
15729  int16_t *tmp_buffer,
15730  int8_t *out_tensor)
15731 {
15732 #if defined(__zcc__)
15733  return tpt_nn_maxpool_HWC_s8_any_act(
15734  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15735  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15736  in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15737 #else
15738  return riscv_nn_maxpool_HWC_s8_any_act(
15739  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15740  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15741  in_tensor_ch, in_tensor, tmp_buffer, out_tensor);
15742 #endif
15743 }
15744 
15749 #endif
15750 
15751 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15752 #if defined(__zcc__)
15753 #include "tpt_nn_pooling.h"
15754 #else
15755 #include "riscv_nn_pooling.h"
15756 #endif
15757 
15786 static inline int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y,
15787  const int in_tensor_dim_x,
15788  const int out_tensor_dim_y,
15789  const int out_tensor_dim_x,
15790  const int stride_y,
15791  const int stride_x,
15792  const int ker_dim_y,
15793  const int ker_dim_x,
15794  const int pad_y,
15795  const int pad_x,
15796  const int act_min,
15797  const int act_max,
15798  const int in_tensor_ch,
15799  int8_t *in_tensor,
15800  int16_t *in_tmp_buf,
15801  int8_t *out_tensor)
15802 {
15803 #if defined(__zcc__)
15804 
15805  tpt_nn_avgpool_params_act_s8 aPool_params = {stride_x, stride_y, pad_x, pad_y,
15806  act_min, act_max};
15807  tpt_nn_avgpool_dims_act_s8 aPool_dims = {in_tensor_dim_x, in_tensor_dim_y, in_tensor_ch,
15808  ker_dim_x, ker_dim_y, out_tensor_dim_x, out_tensor_dim_y};
15809 
15810  return tpt_avgpool_s8_any_act(out_tensor, in_tensor, &aPool_params, &aPool_dims, in_tmp_buf);
15811 
15812 #else
15813  return riscv_nn_avepool_HWC_s8_any_act(
15814  in_tensor_dim_y, in_tensor_dim_x, out_tensor_dim_y, out_tensor_dim_x,
15815  stride_y, stride_x, ker_dim_y, ker_dim_x, pad_y, pad_x, act_min, act_max,
15816  in_tensor_ch, in_tensor, in_tmp_buf, out_tensor);
15817 #endif
15818 }
15819 
15828 static inline int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
15829 {
15830 #if defined(__zcc__)
15831  return tpt_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15832  in_tensor_ch);
15833 #else
15834  return riscv_nn_avepool_HWC_s8_any_act_get_buffer_size(out_tensor_dim_x,
15835  in_tensor_ch);
15836 #endif
15837 }
15838 
15839 #endif
15840 #endif
15841 
15842 #ifdef HPM_MATH_NN_SOFTMAX
15843 #ifdef HPM_EN_MATH_NN_LIB
15844 #if defined(__zcc__)
15845 #include "tpt_nn_softmax.h"
15846 #else
15847 #include "riscv_nn_softmax.h"
15848 #endif
15849 
15874 static inline void hpm_nn_softmax_s8_fast(const q7_t *in_vec,
15875  const uint16_t size,
15876  q7_t *out_vec)
15877 {
15878 #if defined(__zcc__)
15879  tpt_nn_softmax_s8_fast(in_vec, size, out_vec);
15880 #else
15881  riscv_nn_softmax_s8_fast(in_vec, size, out_vec);
15882 #endif
15883 }
15884 
15892 static inline void hpm_nn_softmax_s16_fast(const q15_t *in_vec,
15893  const uint16_t size,
15894  q15_t *out_vec)
15895 {
15896 #if defined(__zcc__)
15897  tpt_nn_softmax_s16_fast(in_vec, size, out_vec);
15898 #else
15899  riscv_nn_softmax_s16_fast(in_vec, size, out_vec);
15900 #endif
15901 }
15902 
15917 static inline void hpm_nn_softmax_s8_hp(const int8_t *in_tensor,
15918  const int32_t in_tensor_row,
15919  const int32_t in_tensor_col,
15920  const int32_t scale,
15921  const int32_t lshift,
15922  const int32_t diff_min,
15923  int8_t *out_tensor)
15924 {
15925 #if defined(__zcc__)
15926  tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15927  diff_min);
15928 #else
15929  riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15930  diff_min, out_tensor);
15931 #endif
15932 }
15933 
15948 static inline void hpm_nn_softmax_u8_hp(const uint8_t *in_tensor,
15949  const int32_t in_tensor_row,
15950  const int32_t in_tensor_col,
15951  const int32_t scale,
15952  const int32_t lshift,
15953  const int32_t diff_min,
15954  uint8_t *out_tensor)
15955 {
15956 #if defined(__zcc__)
15957  tpt_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15958  diff_min, out_tensor);
15959 #else
15960  riscv_nn_softmax_u8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
15961  diff_min, out_tensor);
15962 #endif
15963 }
15964 
15969 #endif
15970 
15971 #ifdef HPM_EN_MATH_NN_RVP32_LIB
15972 #if defined(__zcc__)
15973 #include "tpt_nn_softmax.h"
15974 #else
15975 #include "riscv_nn_softmax.h"
15976 #endif
15977 
15992 static inline void hpm_nn_softmax_s8_hp(const int8_t *in_tensor,
15993  const int32_t in_tensor_row,
15994  const int32_t in_tensor_col,
15995  const int32_t scale,
15996  const int32_t lshift,
15997  const int32_t diff_min,
15998  int8_t *out_tensor)
15999 {
16000 #if defined(__zcc__)
16001  tpt_softmax_s8_hp(out_tensor, in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16002  diff_min);
16003 #else
16004  riscv_nn_softmax_s8_hp(in_tensor, in_tensor_row, in_tensor_col, scale, lshift,
16005  diff_min, out_tensor);
16006 #endif
16007 }
16008 #endif
16009 
16010 #endif
16011 
16012 #ifdef HPM_MATH_NN_UTIL
16013 #ifdef HPM_EN_MATH_NN_LIB
16014 #if defined(__zcc__)
16015 #include "tpt_nn_util.h"
16016 #else
16017 #include "riscv_nn_util.h"
16018 #endif
16019 
16028 #ifdef __riscv_zfh
16037 static inline int32_t hpm_nn_exp_f16(const float16_t *in_vec,
16038  const uint32_t size,
16039  float16_t *out_vec)
16040 {
16041 #if defined(__zcc__)
16042  return tpt_nn_exp_f16(in_vec, size, out_vec);
16043 #else
16044  return riscv_nn_exp_f16(in_vec, size, out_vec);
16045 #endif
16046 }
16047 #endif
16048 
16065 static inline void hpm_nn_reshape_s8(const int8_t *in_tensor,
16066  int8_t *out_tensor,
16067  const uint32_t size)
16068 {
16069 #if defined(__zcc__)
16070  tpt_reshape_s8(out_tensor, in_tensor, size);
16071 #else
16072  riscv_nn_reshape_s8(in_tensor, out_tensor, size);
16073 #endif
16074 }
16075 
16095 static inline int32_t hpm_nn_top_k_s8(q7_t *in_vec,
16096  uint32_t size,
16097  uint32_t k,
16098  q7_t *val,
16099  uint32_t *idx)
16100 {
16101 #if defined(__zcc__)
16102  return tpt_nn_top_k_s8(in_vec, size, k, val, idx);
16103 #else
16104  return riscv_nn_top_k_s8(in_vec, size, k, val, idx);
16105 #endif
16106 }
16107 
16108 #ifdef __riscv_zfh
16128 static inline int32_t hpm_nn_top_k_f16(float16_t *in_vec,
16129  uint32_t size,
16130  uint32_t k,
16131  float16_t *val,
16132  uint32_t *idx)
16133 {
16134 #if defined(__zcc__)
16135  return tpt_nn_top_k_f16(in_vec, size, k, val, idx);
16136 #else
16137  return riscv_nn_top_k_f16(in_vec, size, k, val, idx);
16138 #endif
16139 }
16140 #endif
16141 
16146 #endif
16147 
16148 #ifdef HPM_EN_MATH_NN_RVP32_LIB
16149 #if defined(__zcc__)
16150 #include "tpt_nn_util.h"
16151 #else
16152 #include "riscv_nn_util.h"
16153 #endif
16154 
16171 static inline void hpm_nn_reshape_s8(const int8_t *in_tensor,
16172  int8_t *out_tensor,
16173  const uint32_t size)
16174 {
16175 #if defined(__zcc__)
16176  tpt_reshape_s8(out_tensor, in_tensor, size);
16177 #else
16178  riscv_nn_reshape_s8(in_tensor, out_tensor, size);
16179 #endif
16180 }
16181 
16182 #endif
16183 
16188 #endif
16189 
16190 #ifdef __cplusplus
16191 }
16192 #endif
16193 #endif
#define HPM_FFA
Definition: hpm_soc.h:396
static void hpm_dsp_and_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise AND of two u32 vectors.
Definition: hpm_math.h:1998
static void hpm_dsp_and_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise AND of two u8 vectors.
Definition: hpm_math.h:2016
static void hpm_dsp_clip_q31(q31_t *src, q31_t *dst, q31_t low, q31_t high, uint32_t size)
Elementwise clipping of q31 function.
Definition: hpm_math.h:1927
static void hpm_dsp_clip_f32(float32_t *src, float32_t *dst, float32_t low, float32_t high, uint32_t size)
Elementwise clipping of f32 function.
Definition: hpm_math.h:1908
static void hpm_dsp_clip_q7(q7_t *src, q7_t *dst, q7_t low, q7_t high, uint32_t size)
Elementwise clipping of q7 function.
Definition: hpm_math.h:1965
static void hpm_dsp_clip_q15(q15_t *src, q15_t *dst, q15_t low, q15_t high, uint32_t size)
Elementwise clipping of q15 function.
Definition: hpm_math.h:1946
static void hpm_dsp_not_u16(u16_t *src, u16_t *dst, uint32_t size)
Compute the logical bitwise NOT of u16 vector.
Definition: hpm_math.h:2203
static void hpm_dsp_not_u32(u32_t *src, u32_t *dst, uint32_t size)
Compute the logical bitwise NOT of u32 vector.
Definition: hpm_math.h:2186
static void hpm_dsp_not_u8(u8_t *src, u8_t *dst, uint32_t size)
Compute the logical bitwise NOT of u8 vector.
Definition: hpm_math.h:2220
static void hpm_dsp_or_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise OR of two u8 vectors.
Definition: hpm_math.h:2085
static void hpm_dsp_or_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise OR of two u16 vectors.
Definition: hpm_math.h:2067
static void hpm_dsp_or_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise OR of two u32 vectors.
Definition: hpm_math.h:2049
static void hpm_dsp_xor_u32(u32_t *src1, u32_t *src2, u32_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u32 vectors.
Definition: hpm_math.h:2118
static void hpm_dsp_xor_u16(u16_t *src1, u16_t *src2, u16_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u16 vectors.
Definition: hpm_math.h:2136
static void hpm_dsp_xor_u8(u8_t *src1, u8_t *src2, u8_t *dst, uint32_t size)
Compute the logical bitwise XOR of two u8 vectors.
Definition: hpm_math.h:2154
static void hpm_dsp_add_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Addition of U8 vectors.
Definition: hpm_math.h:1119
static void hpm_dsp_sub_u8_q7(uint8_t *src1, uint8_t *src2, q7_t *dst, uint32_t size)
Subtraction of u8 vectors.
Definition: hpm_math.h:1218
static q31_t hpm_dsp_div_q31(q31_t src1, q31_t src2)
Division of q31 inputs.
Definition: hpm_math.h:1345
static void hpm_dsp_mul_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Multiplication of q15 vectors.
Definition: hpm_math.h:1273
static uint32_t hpm_dsp_dprod_u8(uint8_t *src1, uint8_t *src2, uint32_t size)
Dot production of U8 vectors.
Definition: hpm_math.h:1606
static void hpm_dsp_scale_q31(q31_t *src, q31_t scalefract, int8_t shift, q31_t *dst, uint32_t size)
To multiply a q31 vectors by a q31 scale.
Definition: hpm_math.h:1739
static void hpm_dsp_add_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Addition of q15 vectors.
Definition: hpm_math.h:1079
static void hpm_dsp_offset_q7(q7_t *src, q7_t offset, q7_t *dst, uint32_t size)
The offset of q7 vectors.
Definition: hpm_math.h:1681
static void hpm_dsp_sub_q15(q15_t *src1, q15_t *src2, q15_t *dst, uint32_t size)
Subtraction of q15 vectors.
Definition: hpm_math.h:1178
static q31_t hpm_dsp_div_s64_u32(q63_t src1, uint32_t src2)
Division of q63 inputs divided by a positive 32 bits.
Definition: hpm_math.h:1362
static void hpm_dsp_mul_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Multiplication of q31 vectors.
Definition: hpm_math.h:1253
static q31_t hpm_dsp_div_u64_u32(uint64_t src1, uint32_t src2)
Division of positive 64-bits inputs divided by a positive 32-bits.
Definition: hpm_math.h:1379
static void hpm_dsp_abs_q7(q7_t *src, q7_t *dst, uint32_t size)
Absolute value of q7 vectors.
Definition: hpm_math.h:1020
static void hpm_dsp_neg_q15(q15_t *src, q15_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1437
static q63_t hpm_dsp_dprod_q31(q31_t *src1, q31_t *src2, uint32_t size)
Dot production of q31 vectors.
Definition: hpm_math.h:1501
static void hpm_dsp_offset_f32(float32_t *src, float32_t offset, float32_t *dst, uint32_t size)
The offset of floating-point vectors.
Definition: hpm_math.h:1621
static void hpm_dsp_scale_q7(q7_t *src, q7_t scalefract, int8_t shift, q7_t *dst, uint32_t size)
To multiply a q7 vectors by a q7 scale.
Definition: hpm_math.h:1785
static void hpm_dsp_add_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Addition of q7 vectors.
Definition: hpm_math.h:1099
static void hpm_dsp_offset_u8(uint8_t *src, q7_t offset, uint8_t *dst, uint32_t size)
The offset of U8 vectors.
Definition: hpm_math.h:1701
static void hpm_dsp_sub_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Subtraction of q7 vectors.
Definition: hpm_math.h:1198
static void hpm_dsp_shift_q31(q31_t *src, int8_t shift, q31_t *dst, uint32_t size)
Shifts a q31 vector with a specified shift number.
Definition: hpm_math.h:1846
static void hpm_dsp_scale_q15(q15_t *src, q15_t scalefract, int8_t shift, q15_t *dst, uint32_t size)
To multiply a q15 vectors by a q15 scale.
Definition: hpm_math.h:1762
static void hpm_dsp_add_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Addition of q31 vectors.
Definition: hpm_math.h:1059
static void hpm_dsp_offset_q15(q15_t *src, q15_t offset, q15_t *dst, uint32_t size)
The offset of q15 vectors.
Definition: hpm_math.h:1661
static void hpm_dsp_scale_f32(float32_t *src, float32_t scale, float32_t *dst, uint32_t size)
To multiply a floating-point vectors by a floating-point scale.
Definition: hpm_math.h:1716
static void hpm_dsp_offset_q31(q31_t *src, q31_t offset, q31_t *dst, uint32_t size)
The offset of q31 vectors.
Definition: hpm_math.h:1641
static void hpm_dsp_add_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Addition of floating-potint vectors.
Definition: hpm_math.h:1039
static void hpm_dsp_sub_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Subtraction of floating-point vectors.
Definition: hpm_math.h:1138
static void hpm_dsp_neg_f32(float32_t *src, float32_t *dst, uint32_t size)
Negation of floating-potint vectors.
Definition: hpm_math.h:1397
static void hpm_dsp_neg_q31(q31_t *src, q31_t *dst, uint32_t size)
Negation of q31 vectors.
Definition: hpm_math.h:1417
static void hpm_dsp_neg_q7(q7_t *src, q7_t *dst, uint32_t size)
Negation of q15 vectors.
Definition: hpm_math.h:1457
static void hpm_dsp_shift_q7(q7_t *src, int8_t shift, q7_t *dst, uint32_t size)
Shifts a q7 vector with a specified shift number.
Definition: hpm_math.h:1867
static q31_t hpm_dsp_dprod_q7(q7_t *src1, q7_t *src2, uint32_t size)
Dot production of q7 vectors.
Definition: hpm_math.h:1566
static q63_t hpm_dsp_dprod_q15(q15_t *src1, q15_t *src2, uint32_t size)
Dot production of q15 vectors.
Definition: hpm_math.h:1524
static q31_t hpm_dsp_dprod_q7xq15(q7_t *src1, q15_t *src2, uint32_t size)
Dot production of q7 * q15 vectors.
Definition: hpm_math.h:1589
static float32_t hpm_dsp_dprod_f32(float32_t *src1, float32_t *src2, uint32_t size)
Dot production of floating-point vectors.
Definition: hpm_math.h:1476
static void hpm_dsp_scale_u8(uint8_t *src, q7_t scalefract, int8_t shift, uint8_t *dst, uint32_t size)
To multiply a u8 vectors by a q7 scale.
Definition: hpm_math.h:1807
static void hpm_dsp_sub_q31(q31_t *src1, q31_t *src2, q31_t *dst, uint32_t size)
Subtraction of q31 vectors.
Definition: hpm_math.h:1158
static q31_t hpm_dsp_dprod_u8xq15(uint8_t *src1, q15_t *src2, uint32_t size)
Dot production of u8 * q15 vectors.
Definition: hpm_math.h:1548
static void hpm_dsp_abs_q31(q31_t *src, q31_t *dst, uint32_t size)
Absolute value of q31 vectors.
Definition: hpm_math.h:979
static void hpm_dsp_mul_u8_u16(uint8_t *src1, uint8_t *src2, uint16_t *dst, uint32_t size)
Multiplication of u8 vectors.
Definition: hpm_math.h:1313
static void hpm_dsp_shift_q15(q15_t *src, int8_t shift, q15_t *dst, uint32_t size)
Shifts a q15 vector with a specified shift number.
Definition: hpm_math.h:1825
static void hpm_dsp_div_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Division of floating-point vectors.
Definition: hpm_math.h:1328
static void hpm_dsp_abs_f32(float32_t *src, float32_t *dst, uint32_t size)
Absolute value of floating-potint vectors.
Definition: hpm_math.h:959
static void hpm_dsp_mul_f32(float32_t *src1, float32_t *src2, float32_t *dst, uint32_t size)
Multiplication of floating-point vectors.
Definition: hpm_math.h:1233
static void hpm_dsp_abs_q15(q15_t *src, q15_t *dst, uint32_t size)
Absolute value of q15 vectors.
Definition: hpm_math.h:1000
static void hpm_dsp_shift_u8(uint8_t *src, int8_t shift, uint8_t *dst, uint32_t size)
Shifts a u8 vector for a specified shift number.
Definition: hpm_math.h:1888
static void hpm_dsp_mul_q7(q7_t *src1, q7_t *src2, q7_t *dst, uint32_t size)
Multiplication of q7 vectors.
Definition: hpm_math.h:1293
static void hpm_dsp_cmul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t size)
Multiply two folating-point complex vector.
Definition: hpm_math.h:2583
static void hpm_dsp_cconj_q15(const q15_t *src, q15_t *dst, uint32_t size)
Conjugate the q15 complex vector.
Definition: hpm_math.h:2318
static void hpm_dsp_cmul_real_f32(const float32_t *src, const float32_t *real, float32_t *dst, uint32_t size)
Multiply the folating-point complex vector by a real vector.
Definition: hpm_math.h:2644
static void hpm_dsp_cdprod_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q31_t *dst)
Compute the dot product of the q31 complex vector.
Definition: hpm_math.h:2428
static void hpm_dsp_cconj_f32(const float32_t *src, float32_t *dst, uint32_t size)
Conjugate the floating-potint complex vector.
Definition: hpm_math.h:2298
static void hpm_dsp_cmul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t size)
Multiply two q15 complex vector.
Definition: hpm_math.h:2604
static void hpm_dsp_cmag_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude of the floating-potint complex vector.
Definition: hpm_math.h:2465
static void hpm_dsp_cmag_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude of the q15 complex vector.
Definition: hpm_math.h:2485
static void hpm_dsp_cmag_sqr_f32(const float32_t *src, float32_t *dst, uint32_t size)
Compute the magnitude squared of the floating-potint complex vector.
Definition: hpm_math.h:2524
static void hpm_dsp_cmag_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude of the q31 complex vector.
Definition: hpm_math.h:2505
static void hpm_dsp_cmul_real_q31(const q31_t *src, const q31_t *real, q31_t *dst, uint32_t size)
Multiply the q31 complex vector by a real vector.
Definition: hpm_math.h:2684
static void hpm_dsp_cconj_q31(const q31_t *src, q31_t *dst, uint32_t size)
Conjugate the q31 complex vector.
Definition: hpm_math.h:2338
static void hpm_dsp_cdprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *dst)
Compute the dot product of the floating-potint complex vector.
Definition: hpm_math.h:2357
static void hpm_dsp_cdprod_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q15_t *dst)
Compute the dot product of the q15 complex vector.
Definition: hpm_math.h:2393
static void hpm_dsp_cdprod_typ2_q31(const q31_t *src1, const q31_t *src2, uint32_t size, q63_t *rout, q63_t *iout)
Compute the dot product type2 of the q31 complex vector.
Definition: hpm_math.h:2447
static void hpm_dsp_cmag_sqr_q15(const q15_t *src, q15_t *dst, uint32_t size)
Compute the magnitude squared of the q15 complex vector.
Definition: hpm_math.h:2544
static void hpm_dsp_cmul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size)
Multiply two q31 complex vector.
Definition: hpm_math.h:2625
static void hpm_dsp_cmag_sqr_q31(const q31_t *src, q31_t *dst, uint32_t size)
Compute the magnitude squared of the q31 complex vector.
Definition: hpm_math.h:2564
static void hpm_dsp_cdprod_typ2_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *rout, float32_t *iout)
Compute the dot product type2 of the floating-potint complex vector.
Definition: hpm_math.h:2372
static void hpm_dsp_cdprod_typ2_q15(const q15_t *src1, const q15_t *src2, uint32_t size, q31_t *rout, q31_t *iout)
Compute the dot product type2 of the q15 complex vector.
Definition: hpm_math.h:2411
static void hpm_dsp_cmul_real_q15(const q15_t *src, const q15_t *real, q15_t *dst, uint32_t size)
Multiply the q15 complex vector by a real vector.
Definition: hpm_math.h:2664
static q15_t hpm_dsp_pid_q15(riscv_dsp_pid_q15_t *instance, q15_t src)
Definition: hpm_math.h:2910
static void hpm_dsp_init_pid_q15(riscv_dsp_pid_q15_t *instance, int32_t set)
PID initializatopn control function of Q15 formats.
Definition: hpm_math.h:2928
static void hpm_dsp_inv_park_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta, q31_t sin, q31_t cos)
Inverse Park transform of q31 input.
Definition: hpm_math.h:2839
static void hpm_dsp_park_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b, q31_t sin, q31_t cos)
Park transform of q31 input.
Definition: hpm_math.h:2804
static void hpm_dsp_init_pid_f32(riscv_dsp_pid_f32_t *instance, int32_t set)
PID initializatopn control function of floating-point formats.
Definition: hpm_math.h:2870
static void hpm_dsp_park_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b, float32_t sin, float32_t cos)
Park transform of floating-point input.
Definition: hpm_math.h:2785
static void hpm_dsp_inv_clarke_q31(q31_t alpha, q31_t beta, q31_t *a, q31_t *b)
Inverse Clarke transform of q31 input.
Definition: hpm_math.h:2768
static void hpm_dsp_init_pid_q31(riscv_dsp_pid_q31_t *instance, int32_t set)
PID initializatopn control function of Q31 formats.
Definition: hpm_math.h:2903
static void hpm_dsp_inv_clarke_f32(float32_t alpha, float32_t beta, float32_t *a, float32_t *b)
Inverse Clarke transform of floating-point input.
Definition: hpm_math.h:2752
static q31_t hpm_dsp_pid_q31(riscv_dsp_pid_q31_t *instance, q31_t src)
PID control of Q31 input.
Definition: hpm_math.h:2884
static void hpm_dsp_clarke_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta)
Clarke transform of floating-point input.
Definition: hpm_math.h:2721
static void hpm_dsp_inv_park_f32(float32_t a, float32_t b, float32_t *alpha, float32_t *beta, float32_t sin, float32_t cos)
Inverse Park transform of floating-point input.
Definition: hpm_math.h:2821
static float32_t hpm_dsp_pid_f32(riscv_dsp_pid_f32_t *instance, float32_t src)
PID control of floating-point input.
Definition: hpm_math.h:2852
static void hpm_dsp_clarke_q31(q31_t a, q31_t b, q31_t *alpha, q31_t *beta)
Clarke transform of q31 input.
Definition: hpm_math.h:2737
static float32_t hpm_dsp_dist_euclidean_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Euclidean distance between two vectors.
Definition: hpm_math.h:3071
static float32_t hpm_dsp_dist_city_block_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cityblock (Manhattan) distance between two vectors.
Definition: hpm_math.h:3017
static float32_t hpm_dsp_bdist_sokal_sneath_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Sneath distance between two vectors.
Definition: hpm_math.h:3216
static float32_t hpm_dsp_dist_bray_curtis_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Bray-Curtis distance between two vectors.
Definition: hpm_math.h:2963
static float32_t hpm_dsp_bdist_kulsinski_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Kulsinski distance between two vectors.
Definition: hpm_math.h:3180
static float32_t hpm_dsp_bdist_jaccard_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Jaccard distance between two vectors.
Definition: hpm_math.h:3162
static float32_t hpm_dsp_dist_canberra_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Canberra distance between two vectors.
Definition: hpm_math.h:2981
static float32_t hpm_dsp_bdist_hamming_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Hamming distance between two vectors.
Definition: hpm_math.h:3144
static float32_t hpm_dsp_dist_cos_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Cosine distance between two vectors.
Definition: hpm_math.h:3053
static float32_t hpm_dsp_dist_corr_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Correlation distance between two vectors.
Definition: hpm_math.h:3035
static float32_t hpm_dsp_dist_chebyshev_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Chebyshev distance between two vectors.
Definition: hpm_math.h:2999
static float32_t hpm_dsp_bdist_rogers_tanimoto_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Roger Stanimoto distance between two vectors.
Definition: hpm_math.h:3234
static float32_t hpm_dsp_bdist_yule_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Yule distance between two vectors.
Definition: hpm_math.h:3252
static float32_t hpm_dsp_bdist_russell_rao_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Russell-Rao distance between two vectors.
Definition: hpm_math.h:3270
static float32_t hpm_dsp_bdist_dice_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Dice distance between two vectors.
Definition: hpm_math.h:3126
static float32_t hpm_dsp_bdist_sokal_michener_u32_f32(const uint32_t *src1, const uint32_t *src2, uint32_t numofbool)
Sokal-Michener distance between two vectors.
Definition: hpm_math.h:3198
static float32_t hpm_dsp_dist_minkowski_f32(const float32_t *src1, const float32_t *src2, int32_t order, uint32_t size)
Minkowski distance between two vectors.
Definition: hpm_math.h:3108
static float32_t hpm_dsp_dist_jensen_shannon_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Jensen-Shannon distance between two vectors.
Definition: hpm_math.h:3089
#define FFA_DATA_TYPE_COMPLEX_Q31
Definition: hpm_ffa_drv.h:39
hpm_stat_t ffa_calculate_fft_blocking(FFA_Type *ptr, fft_xfer_t *fft_xfer)
Perform FFT transformation in blocking mode.
Definition: hpm_ffa_drv.c:118
#define FFA_DATA_TYPE_COMPLEX_Q15
Definition: hpm_ffa_drv.h:40
static void hpm_dsp_corr_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3909
static void hpm_dsp_bq_df1_32x64_q31(const riscv_dsp_bq_df1_32x64_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3975
static void hpm_dsp_lfir_f32(const riscv_dsp_lfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point lattice FIR filter.
Definition: hpm_math.h:3425
static void hpm_dsp_spafir_q15(riscv_dsp_spafir_q15_t *instance, q15_t *src, q15_t *dst, q15_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3519
static void hpm_dsp_corr_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Correlation of the q7 vectors.
Definition: hpm_math.h:3935
static void hpm_dsp_dcmfir_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3477
static void hpm_dsp_nlms_q15(riscv_dsp_nlms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Definition: hpm_math.h:3629
static void hpm_dsp_liir_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4018
static void hpm_dsp_bq_df1_f32(const riscv_dsp_bq_df1_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3945
static void hpm_dsp_spafir_f32(riscv_dsp_spafir_f32_t *instance, float32_t *src, float32_t *dst, float32_t *buf, uint32_t size)
Definition: hpm_math.h:3513
static void hpm_dsp_conv_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Convolution of the floating-point vectors.
Definition: hpm_math.h:3647
static void hpm_dsp_spafir_q7(riscv_dsp_spafir_q7_t *instance, q7_t *src, q7_t *dst, q7_t *buf1, q31_t *buf2, uint32_t size)
Definition: hpm_math.h:3531
static void hpm_dsp_fir_fast_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3391
static void hpm_dsp_bq_df2T_f64(const riscv_dsp_bq_df2T_f64_t *instance, float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:3987
static void hpm_dsp_bq_df1_fast_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3957
static void hpm_dsp_dcmfir_fast_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3489
static void hpm_dsp_liir_fast_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4012
static void hpm_dsp_lms_q31(const riscv_dsp_lms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Function for the q31 LMS filter.
Definition: hpm_math.h:3575
static void hpm_dsp_fir_f32(const riscv_dsp_fir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Function for the floating-point FIR filter.
Definition: hpm_math.h:3310
static void hpm_dsp_conv_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst)
Convolution of the q31 vectors.
Definition: hpm_math.h:3701
static void hpm_dsp_bq_df2T_f32(const riscv_dsp_bq_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3981
static void hpm_dsp_lms_f32(const riscv_dsp_lms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the floatint-point standard LMS Filters.
Definition: hpm_math.h:3552
static void hpm_dsp_corr_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst)
Correlation of the floating-point vectors.
Definition: hpm_math.h:3853
static void hpm_dsp_fir_q15(const riscv_dsp_fir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 FIR filter.
Definition: hpm_math.h:3372
static void hpm_dsp_lfir_q15(const riscv_dsp_lfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Function for the q15 lattice FIR filter.
Definition: hpm_math.h:3440
static int32_t hpm_dsp_conv_partial_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q15 vectors.
Definition: hpm_math.h:3778
static void hpm_dsp_dcmfir_f32(const riscv_dsp_dcmfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3465
static void hpm_dsp_nlms_f32(riscv_dsp_nlms_f32_t *instance, float32_t *src, float32_t *ref, float32_t *dst, float32_t *err, uint32_t size)
Structure for the f32 normalized LMS filter.
Definition: hpm_math.h:3609
static void hpm_dsp_upsplfir_q15(const riscv_dsp_upsplfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3501
static void hpm_dsp_conv_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Convolution of the q15 vectors.
Definition: hpm_math.h:3673
static void hpm_dsp_upsplfir_q31(const riscv_dsp_upsplfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3507
static void hpm_dsp_dcmfir_q15(const riscv_dsp_dcmfir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3471
static void hpm_dsp_dcmfir_fast_q31(const riscv_dsp_dcmfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3483
static void hpm_dsp_spafir_q31(riscv_dsp_spafir_q31_t *instance, q31_t *src, q31_t *dst, q31_t *buf, uint32_t size)
Definition: hpm_math.h:3525
static void hpm_dsp_fir_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3331
static void hpm_dsp_lfir_q31(const riscv_dsp_lfir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 lattice FIR filter.
Definition: hpm_math.h:3459
static void hpm_dsp_upsplfir_f32(const riscv_dsp_upsplfir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3495
static int32_t hpm_dsp_conv_partial_q31(q31_t *src1, uint32_t len1, q31_t *src2, uint32_t len2, q31_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q31 vectors.
Definition: hpm_math.h:3804
static void hpm_dsp_liir_q31(const riscv_dsp_liir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:4006
static int32_t hpm_dsp_conv_partial_f32(float32_t *src1, uint32_t len1, float32_t *src2, uint32_t len2, float32_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the floating-point vectors.
Definition: hpm_math.h:3752
static void hpm_dsp_fir_fast_q31(const riscv_dsp_fir_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Function for the q31 FIR filter.
Definition: hpm_math.h:3352
static void hpm_dsp_liir_f32(const riscv_dsp_liir_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:4000
static void hpm_dsp_conv_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst)
Convolution of the q7 vectors.
Definition: hpm_math.h:3727
static void hpm_dsp_bq_df1_fast_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3969
static void hpm_dsp_bq_df1_q15(const riscv_dsp_bq_df1_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:3951
static void hpm_dsp_corr_q15(q15_t *src1, uint32_t len1, q15_t *src2, uint32_t len2, q15_t *dst)
Correlation of the q15 vectors.
Definition: hpm_math.h:3879
static void hpm_dsp_liir_fast_q15(const riscv_dsp_liir_q15_t *instance, q15_t *src, q15_t *dst, uint32_t size)
Definition: hpm_math.h:4024
static int32_t hpm_dsp_conv_partial_q7(q7_t *src1, uint32_t len1, q7_t *src2, uint32_t len2, q7_t *dst, uint32_t startindex, uint32_t size)
Convolution Partial of the q7 vectors.
Definition: hpm_math.h:3830
static void hpm_dsp_bq_df1_q31(const riscv_dsp_bq_df1_q31_t *instance, q31_t *src, q31_t *dst, uint32_t size)
Definition: hpm_math.h:3963
static void hpm_dsp_lms_q15(const riscv_dsp_lms_q15_t *instance, q15_t *src, q15_t *ref, q15_t *dst, q15_t *err, uint32_t size)
Function for the q15 LMS filter.
Definition: hpm_math.h:3598
static void hpm_dsp_bq_stereo_df2T_f32(const riscv_dsp_bq_stereo_df2T_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Definition: hpm_math.h:3993
static void hpm_dsp_nlms_q31(riscv_dsp_nlms_q31_t *instance, q31_t *src, q31_t *ref, q31_t *dst, q31_t *err, uint32_t size)
Structure for the q31 normalized LMS filter.
Definition: hpm_math.h:3621
static void hpm_dsp_fir_q7(const riscv_dsp_fir_q7_t *instance, q7_t *src, q7_t *dst, uint32_t size)
Function for the q7 FIR filter.
Definition: hpm_math.h:3410
static void hpm_dsp_mat_sub_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Substraction of two floating-potint matrices.
Definition: hpm_math.h:4500
static void hpm_dsp_mat_oprod_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t size1, uint32_t size2)
Outer production of two q31 matrices.
Definition: hpm_math.h:4684
static void hpm_dsp_mat_trans_q15(const q15_t *src, q15_t *dst, uint32_t row, uint32_t col)
Transpose the q15 matricex.
Definition: hpm_math.h:4595
static void hpm_dsp_mat_mul_mxv_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for f32 formats.
Definition: hpm_math.h:4718
static void hpm_dsp_mat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point matrices.
Definition: hpm_math.h:4170
static void hpm_dsp_cmat_mul_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two floating-point complex matrices.
Definition: hpm_math.h:4201
static int32_t hpm_dsp_mat_inv_f32(float32_t *src, float32_t *dst, uint32_t size)
Compute the inverse matrix of the floating-potint matrix.
Definition: hpm_math.h:4139
static void hpm_dsp_mat_mul_vxm_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t col, uint32_t col2)
Multiplication of q7 vetor by matrix.
Definition: hpm_math.h:4378
static void hpm_dsp_mat_add_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Addition of two q31 matrices.
Definition: hpm_math.h:4120
static void hpm_dsp_mat_mul_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4181
static void hpm_dsp_mat_trans_u8(const uint8_t *src, uint8_t *dst, uint32_t row, uint32_t col)
Transpose the u8 matricex.
Definition: hpm_math.h:4631
static void hpm_dsp_mat_scale_q15(const q15_t *src, q15_t scale_fract, int32_t shift, q15_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q15 matrix.
Definition: hpm_math.h:4433
static void hpm_dsp_mat_mul_fast_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4302
static void hpm_dsp_mat_mul_mxv_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q15 formats.
Definition: hpm_math.h:4738
static void hpm_dsp_mat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 matrices.
Definition: hpm_math.h:4292
static void hpm_dsp_mat_trans_f64(const float64_t *src, float64_t *dst, uint32_t row, uint32_t col)
Transpose the double-precision floating-potint matrices.
Definition: hpm_math.h:4563
static void hpm_dsp_cmat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 complex matrices.
Definition: hpm_math.h:4265
static void hpm_dsp_cmat_mul_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q31 complex matrices.
Definition: hpm_math.h:4329
static void hpm_dsp_mat_trans_f32(const float32_t *src, float32_t *dst, uint32_t row, uint32_t col)
Transpose the floating-potint matricex.
Definition: hpm_math.h:4581
static void hpm_dsp_mat_mul_mxv_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q31 formats.
Definition: hpm_math.h:4758
static void hpm_dsp_mat_mul_fast_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Definition: hpm_math.h:4238
static void hpm_dsp_mat_trans_q31(const q31_t *src, q31_t *dst, uint32_t row, uint32_t col)
Transpose the q31 matricex.
Definition: hpm_math.h:4613
static void hpm_dsp_mat_sub_f64(const float64_t *src1, const float64_t *src2, float64_t *dst, uint32_t row, uint32_t col)
Substraction of two double-precision floating-potint matrices.
Definition: hpm_math.h:4480
static void hpm_dsp_mat_add_f32(const float32_t *src1, const float32_t *src2, float32_t *dst, uint32_t row, uint32_t col)
Addition of two floating-potint matrices.
Definition: hpm_math.h:4078
static void hpm_dsp_mat_add_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Addition of two q15 matrices.
Definition: hpm_math.h:4099
static void hpm_dsp_mat_scale_f32(const float32_t *src, float32_t scale, float32_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of floating-potint matrix.
Definition: hpm_math.h:4408
static void hpm_dsp_mat_mul_mxv_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col)
Matrix multiply vector for q7 formats.
Definition: hpm_math.h:4778
static void hpm_dsp_mat_mul_q7(const q7_t *src1, const q7_t *src2, q7_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q7 matrices.
Definition: hpm_math.h:4356
static int32_t hpm_dsp_mat_pwr2_cache_f64(const float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4392
static void hpm_dsp_mat_mul_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col, uint32_t col2)
Multiplication of two q15 matrices.
Definition: hpm_math.h:4228
static void hpm_dsp_mat_scale_q31(const q31_t *src, q31_t scale_fract, int32_t shift, q31_t *dst, uint32_t row, uint32_t col)
Multiplt a scale value of q31 matrix.
Definition: hpm_math.h:4458
static void hpm_dsp_mat_trans_q7(const q7_t *src, q7_t *dst, uint32_t row, uint32_t col)
Transpose the q7 matrices.
Definition: hpm_math.h:4646
static void hpm_dsp_mat_sub_q31(const q31_t *src1, const q31_t *src2, q31_t *dst, uint32_t row, uint32_t col)
Substraction of two q31 matrices.
Definition: hpm_math.h:4542
static int32_t hpm_dsp_mat_inv_f64(float64_t *src, float64_t *dst, uint32_t size)
Definition: hpm_math.h:4149
static void hpm_dsp_mat_sub_q15(const q15_t *src1, const q15_t *src2, q15_t *dst, uint32_t row, uint32_t col)
Substraction of two q15 matrices.
Definition: hpm_math.h:4521
static void hpm_nn_activate_s16(q15_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses sigmoid or tanh function to perform activation for signed 16-bit integer input vec...
Definition: hpm_math.h:6864
static void size
Definition: hpm_math.h:6899
static void hpm_nn_leaky_relu_s8(q7_t *in_out, uint32_t size, q15_t slope) riscv_nn_leaky_relu_s8(in_out
This function uses the leaky ReLU function to perform activation for signed 8-bit integer input vecto...
static void hpm_nn_relu_s16(q15_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 16-bit integer input vectors.
Definition: hpm_math.h:6950
static void hpm_nn_activate_s8(q7_t *in_out, uint32_t size, uint16_t int_bits, riscv_nn_activation_fun act_fun)
This function uses the sigmoid or tanh function to perform activation for signed 8-bit integer input ...
Definition: hpm_math.h:6837
static void slope
Definition: hpm_math.h:6899
static void hpm_nn_relu_any_s8(q7_t *data, uint16_t size, q7_t max_val)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6910
static void hpm_nn_relu_s8(q7_t *in_out, uint32_t size)
This function uses the ReLU function to perform activation for signed 8-bit integer input vectors.
Definition: hpm_math.h:6935
static void hpm_nn_add_s8_sym(const q7_t *in_tensor1, const q7_t *in_tensor2, const int16_t *scale1, const int16_t *scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7058
static int hpm_nn_ew_add_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_scale1, const int32_t in_rshift1, const int32_t in_offset2, const int32_t in_scale2, const int32_t in_rshift2, const int32_t lshift, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_rshift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise addition for signed 8-bit integer input vectors.
Definition: hpm_math.h:7166
static int hpm_nn_ew_mul_s8_asym(const int8_t *in_tensor1, const int8_t *in_tensor2, const int32_t in_offset1, const int32_t in_offset2, int8_t *out, const int32_t out_offset, const int32_t out_scale, const int32_t out_shift, const int32_t act_min, const int32_t act_max, const uint32_t size)
This function performs element-wise multiplication for signed 8-bit integer input vectors.
Definition: hpm_math.h:7235
static void hpm_nn_add_s8_sym_round(const q7_t *in_tensor1, const q7_t *in_tensor2, const uint32_t scale1, const uint32_t scale2, const uint32_t size, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out)
This function performs element-wise addition for signed 8-bit integer input vectors with two-stage sh...
Definition: hpm_math.h:7096
static void hpm_nn_concate_s8_z(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_z, const uint32_t out_offset_z)
This function concatenates the int8_t/uint8_t input tensor along the z-axis with the output tensor.
Definition: hpm_math.h:7495
static void hpm_nn_concate_s8_x(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_x, const uint32_t out_offset_x)
This function concatenates the int8_t/uint8_t input tensor along the x-axis with the output tensor.
Definition: hpm_math.h:7425
static void hpm_nn_concate_s8_y(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint16_t out_tensor_y, const uint32_t out_offset_y)
This function concatenates the int8_t/uint8_t input tensor along the y-axis with the output tensor.
Definition: hpm_math.h:7460
static void hpm_nn_concate_s8_w(const int8_t *in_tensor, const uint16_t in_tensor_x, const uint16_t in_tensor_y, const uint16_t in_tensor_z, const uint16_t in_tensor_w, int8_t *out_tensor, const uint32_t out_offset_w)
This function concatenates the int8_t/uint8_t input tensor along the w-axis with the output tensor.
Definition: hpm_math.h:7390
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9630
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12065
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11993
static int32_t hpm_nn_conv_dw_HWC_u8_u8_u8_asym_bias_any(const uint8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint8_t *ker_weight, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t in_offset, const int32_t ker_offset, const int32_t out_offset, uint8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t act_min, const int32_t act_max, const int32_t out_shift, const int32_t out_scale)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:13252
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with bias inputs and ...
Definition: hpm_math.h:10040
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with symmetric...
Definition: hpm_math.h:11742
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:9064
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:9303
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10276
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution with shift-based quantization on th...
Definition: hpm_math.h:8483
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11856
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:9223
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10917
static int32_t hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12798
static int32_t hpm_nn_conv_1x1_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 8-bit int...
Definition: hpm_math.h:8902
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit interger inputs/outputs in any x and ...
Definition: hpm_math.h:12646
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12211
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:8655
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution with shift-based quantization on the ou...
Definition: hpm_math.h:8301
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:9383
static int32_t hpm_nn_conv_HWC_u8_s8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 8-...
Definition: hpm_math.h:9924
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:9982
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11511
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11393
static int32_t hpm_nn_conv_HWC_u8_s16_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs and signed 16...
Definition: hpm_math.h:9691
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:9144
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:11920
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution with shift-based quantization on the outputs.
Definition: hpm_math.h:7860
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with symmetric quan...
Definition: hpm_math.h:10448
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ch_mult, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *tmp_buf)
This function performs depthwise convolution for signed 8-bit interger inputs/outputs in any x and y ...
Definition: hpm_math.h:13047
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution for RGB images with shift-based quantization ...
Definition: hpm_math.h:7702
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9866
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10505
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:12353
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:10562
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution in any x and y dimensions with shift-bas...
Definition: hpm_math.h:8133
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9808
static int32_t hpm_nn_conv_HWC_u8_s16_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 16-bit integer o...
Definition: hpm_math.h:11268
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10217
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:11799
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs with bias inpu...
Definition: hpm_math.h:11452
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12909
static int32_t hpm_nn_conv_dw_HWC_3x3_s8_s8_s8_asym_bias_any(const int8_t *in_tensor, const int32_t in_tensor_dim_x, const int32_t in_tensor_dim_y, const int32_t in_tensor_ch, const int8_t *ker_weight, const int32_t out_tensor_ch, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_tensor_dim_x, const int32_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const int32_t dilation_x, const int32_t dilation_y, int16_t *tmp_buf)
This function performs depthwise 3x3 kernels convolution for signed 8-bit integer inputs/outputs in a...
Definition: hpm_math.h:12961
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_bias(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with bias inputs...
Definition: hpm_math.h:11334
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:10771
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10099
static void hpm_nn_conv_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer convolution in any x and y dimensions with shift-based qu...
Definition: hpm_math.h:7950
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:10626
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:12698
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs with bias inputs an...
Definition: hpm_math.h:10158
static int32_t hpm_nn_conv_1x1_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:8739
static int32_t hpm_nn_conv_dw_HWC_u8_u8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12423
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_bias_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12138
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias_fast_any(const q15_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 16-bit integer convolution in any x and y dimensions with shift-ba...
Definition: hpm_math.h:8396
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10391
static int32_t hpm_nn_conv_dw_HWC_u8_s8_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 8-bit integ...
Definition: hpm_math.h:12494
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with sy...
Definition: hpm_math.h:9750
static int32_t hpm_nn_conv_1x1_HWC_u8_s16_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs and signed 16-bit in...
Definition: hpm_math.h:8984
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast signed 8-bit integer convolution for RGB images with shift-based quantiza...
Definition: hpm_math.h:7781
static int32_t hpm_nn_conv_dw_HWC_s8_s16_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs and signed 16-bit intege...
Definition: hpm_math.h:11685
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sft_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 8-bit integer depthwise convolution in any x and y dimensions with shif...
Definition: hpm_math.h:8574
static int32_t hpm_nn_conv_HWC_u8_u8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs/outputs in any x and y dime...
Definition: hpm_math.h:11128
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:11198
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any_get_buffer_size(const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y)
This function is used to get the needed size, in bytes, by the input temporary buffer of riscv_nn_con...
Definition: hpm_math.h:13199
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_asym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t dilation_x, const uint16_t dilation_y, q15_t *in_tmp_buf)
This function performs fast depthwise convolution for signed 8-bit integer inputs/outputs in any x an...
Definition: hpm_math.h:13142
static int32_t hpm_nn_conv_1x1_HWC_u8_u8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs 1x1 kernels convolution for unsigned 8-bit integer inputs/outputs in any x and...
Definition: hpm_math.h:8820
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sft_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs fast signed 8-bit integer convolution with shift-based quantization on the out...
Definition: hpm_math.h:8038
static int32_t hpm_nn_conv_HWC_s8_s16_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs and signed 16-b...
Definition: hpm_math.h:9510
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs with symmetric q...
Definition: hpm_math.h:11628
static int hpm_nn_conv_1xn_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t pad_x, const uint16_t stride_x, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, q15_t *in_tmp_buf)
This function performs 1xn kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:12747
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_bias(const u8_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:11570
static int32_t hpm_nn_conv_HWC_s16_s16_s16_sft_bias(const q15_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q15_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q15_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs signed 16-bit integer convolution with shift-based quantization on the outputs...
Definition: hpm_math.h:8220
static int32_t hpm_nn_conv_HWC_u8_s8_s8_sym_bias_fast_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for unsigned 8-bit integer inputs and signed 8-bit integer ou...
Definition: hpm_math.h:10844
static int32_t hpm_nn_conv_dw_HWC_s8_s8_s8_sym_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for signed 8-bit integer inputs/outputs in any x and y d...
Definition: hpm_math.h:12282
static int32_t hpm_nn_conv_HWC_u8_u8_s8_RGB_sym_bias_fast(const u8_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for unsigned 8-bit integer inputs/outputs with ...
Definition: hpm_math.h:9570
static int32_t hpm_nn_conv_dw_HWC_u8_s16_s8_sym_any(const u8_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs depthwise convolution for unsigned 8-bit integer inputs and signed 16-bit inte...
Definition: hpm_math.h:12565
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:11058
static int32_t hpm_nn_conv_HWC_s8_s8_s8_RGB_sym_bias_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf, q15_t *wt_tmp_buf)
This function performs fast convolution on RGB images for signed 8-bit integer inputs/outputs with bi...
Definition: hpm_math.h:9449
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs in any x and y dimens...
Definition: hpm_math.h:10988
static int32_t hpm_nn_conv_1x1_HWC_s8_s8_s8_sft_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_lshift, const uint16_t out_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf, q7_t *tmp_buf)
This function performs 1x1 kernels convolution for signed 8-bit integer inputs/outputs in any x and y...
Definition: hpm_math.h:7615
static int32_t hpm_nn_conv_HWC_s8_s8_s8_sym_fast(const q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_tensor, const uint16_t out_tensor_dim, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs/outputs with symmetric quanti...
Definition: hpm_math.h:10334
static int32_t hpm_nn_conv_HWC_s8_s16_s8_sym_bias_fast_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const q31_t *bias, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_tensor, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs fast convolution for signed 8-bit integer inputs and signed 16-bit integer out...
Definition: hpm_math.h:10699
static int32_t hpm_nn_conv_HWC_s8_s8_s8_asym_bias_any(const q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t in_tensor_group, const q7_t *ker_weight, const uint16_t out_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, q7_t *out_tensor, const int32_t *out_shift, const int32_t *out_scale, const int32_t out_offset, const int32_t in_offset, const int32_t act_min, const int32_t act_max, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q15_t *in_tmp_buf)
This function performs convolution for signed 8-bit integer inputs/outputs in any x and y dimensions ...
Definition: hpm_math.h:12852
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with shift-based quantizati...
Definition: hpm_math.h:14140
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14490
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14886
static int32_t out_vec
Definition: hpm_math.h:14079
static int32_t in_tmp_buf
Definition: hpm_math.h:14080
static int32_t hpm_nn_fc_s8_s16_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14577
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14352
static int32_t hpm_nn_fc_u8_s16_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:15157
static int32_t bias
Definition: hpm_math.h:14079
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14261
static int32_t hpm_nn_fc_mat_vec_s16_s16_s8_sft_bias(const q15_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q15_t *out_vec, q15_t *tmp_buf)
This function multiplies a signed 16-bit integer input vector by a signed 8-bit integer weight matrix...
Definition: hpm_math.h:14219
static void hpm_nn_fc_mat_vec_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for riscv_nn_fc_mat_vec_s16_s16_s8_sft_bias_fast.
Definition: hpm_math.h:15231
static int32_t hpm_nn_fc_s8_s16_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:15023
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias(const int8_t *in_vec, const int8_t *wt_mat, const uint16_t in_vec_col, const uint16_t wt_mat_row, const uint16_t in_vec_group, const int32_t in_offset, const int32_t wt_offset, const int32_t out_scale, const int32_t out_shift, const int32_t out_offset, const int32_t *bias, int8_t *out_vec, const int32_t act_min, const int32_t act_max, q15_t *tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with bias inputs and asymmet...
Definition: hpm_math.h:15273
static int32_t hpm_nn_fc_s8_s8_s8_sym(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with symmetric quant...
Definition: hpm_math.h:14535
static void hpm_nn_fc_s8_wt_converter(const q7_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q7_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 8-bit weight data and name...
Definition: hpm_math.h:15188
static int32_t hpm_nn_fc_u8_s16_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14933
static int32_t hpm_nn_fc_s8_s8_s8_asym_bias_get_buffer_size(const uint16_t in_vec_col)
This function is used to get the needed size, in bytes, by the temporary buffer of riscv_nn_fc_s8_s8_...
Definition: hpm_math.h:15312
static int32_t out_rshift
Definition: hpm_math.h:14079
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14839
static int32_t hpm_nn_fc_s8_s8_s8_sym_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with interleaved mul...
Definition: hpm_math.h:14978
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs with interleaved multiplicat...
Definition: hpm_math.h:14105
static int32_t hpm_nn_fc_u8_s8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14444
static int32_t hpm_nn_fc_s8_s8_s8_sft_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q7_t *bias, q7_t *out_vec, q15_t *in_tmp_buf) return riscv_nn_fc_s8_s8_s8_sft_bias(in_vec
This is a fully connected layer function for signed 8-bit integer inputs with shift-based quantizatio...
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs,...
Definition: hpm_math.h:14746
static int32_t wt_row_num
Definition: hpm_math.h:14078
static int32_t hpm_nn_fc_s8_s8_s8_sym_bias(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs/outputs with bias inputs and...
Definition: hpm_math.h:14306
static int32_t hpm_nn_fc_u8_u8_s8_sym_bias(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with bias inputs a...
Definition: hpm_math.h:14398
static int32_t bias_lshift
Definition: hpm_math.h:14079
static void hpm_nn_fc_s16_wt_converter(const q15_t *wt_mat, const uint32_t size, const uint32_t wt_row_num, q15_t *wt_mat_out)
This is a weight converter for those fully-connected functions with signed 16-bit weight data and nam...
Definition: hpm_math.h:15210
static int32_t wt_mat
Definition: hpm_math.h:14078
static int32_t hpm_nn_fc_u8_s16_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 16-bit integer ...
Definition: hpm_math.h:14703
static int32_t hpm_nn_fc_u8_u8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with symmetric qua...
Definition: hpm_math.h:14619
static int32_t hpm_nn_fc_u8_s8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:15112
static int32_t hpm_nn_fc_u8_u8_s8_sym_fast(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, u8_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs/outputs with interleaved m...
Definition: hpm_math.h:15067
static int32_t hpm_nn_fc_s8_s16_s8_sym_bias_fast(const q7_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, const q31_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 8-bit integer inputs and signed 16-bit integer ou...
Definition: hpm_math.h:14793
static int32_t hpm_nn_fc_s16_s16_s16_sft_bias_fast(const q15_t *in_vec, const q15_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t bias_lshift, const uint16_t out_rshift, const q15_t *bias, q15_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for signed 16-bit integer inputs with interleaved multiplica...
Definition: hpm_math.h:14183
static int32_t hpm_nn_fc_u8_s8_s8_sym(const u8_t *in_vec, const q7_t *wt_mat, const uint16_t size, const uint16_t wt_row_num, const uint16_t pre_rshift, const uint16_t out_scale, const uint16_t post_rshift, q7_t *out_vec, q15_t *in_tmp_buf)
This is a fully connected layer function for unsigned 8-bit integer inputs and signed 8-bit integer o...
Definition: hpm_math.h:14661
static int32_t hpm_nn_avepool_HWC_s8_any_act(const int in_tensor_dim_y, const int in_tensor_dim_x, const int out_tensor_dim_y, const int out_tensor_dim_x, const int stride_y, const int stride_x, const int ker_dim_y, const int ker_dim_x, const int pad_y, const int pad_x, const int act_min, const int act_max, const int in_tensor_ch, int8_t *in_tensor, int16_t *in_tmp_buf, int8_t *out_tensor)
This is an average pooling function for S8 inputs with any x and y dimension with the actvating param...
Definition: hpm_math.h:15588
static void hpm_nn_maxpool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15670
static int32_t hpm_nn_maxpool_HWC_s8_any_act(const uint16_t in_tensor_dim_y, const uint16_t in_tensor_dim_x, const uint16_t out_tensor_dim_y, const uint16_t out_tensor_dim_x, const uint16_t stride_y, const uint16_t stride_x, const uint16_t ker_dim_y, const uint16_t ker_dim_x, const uint16_t pad_y, const uint16_t pad_x, const int8_t act_min, const int8_t act_max, const uint16_t in_tensor_ch, int8_t *in_tensor, int16_t *tmp_buffer, int8_t *out_tensor)
This is a max pooling function for signed 8-bit integer inputs in any x and y dimensions with the act...
Definition: hpm_math.h:15715
static void hpm_nn_avepool_HWC_s8(q7_t *in_tensor, const uint16_t in_tensor_dim, const uint16_t in_tensor_ch, const uint16_t ker_dim, const uint16_t pad, const uint16_t stride, const uint16_t out_tensor_dim, q7_t *in_tmp_buf, q7_t *out_tensor)
This is an average pooling function for signed 8-bit integer inputs.
Definition: hpm_math.h:15466
static int32_t hpm_nn_avepool_HWC_s8_any_act_get_buffer_size(const int out_tensor_dim_x, const int in_tensor_ch)
This function is used to obtain the required size, in bytes, for the input temporary buffer of riscv_...
Definition: hpm_math.h:15630
static void hpm_nn_avepool_HWC_s8_any(q7_t *in_tensor, const uint16_t in_tensor_dim_x, const uint16_t in_tensor_dim_y, const uint16_t in_tensor_ch, const uint16_t ker_dim_x, const uint16_t ker_dim_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const uint16_t out_tensor_dim_x, const uint16_t out_tensor_dim_y, q7_t *in_tmp_buf, q7_t *out_tensor, const uint16_t out_lshift)
This is an average pooling function for signed 8-bit integer inputs in any x and y dimensions.
Definition: hpm_math.h:15531
static void hpm_nn_softmax_s8_hp(const int8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, int8_t *out_tensor)
This is a softmax function for signed 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15917
static void hpm_nn_softmax_u8_hp(const uint8_t *in_tensor, const int32_t in_tensor_row, const int32_t in_tensor_col, const int32_t scale, const int32_t lshift, const int32_t diff_min, uint8_t *out_tensor)
This is a softmax function for unsigned 8-bit integer input tensor with high precision algorithm.
Definition: hpm_math.h:15948
static void hpm_nn_softmax_s8_fast(const q7_t *in_vec, const uint16_t size, q7_t *out_vec)
This is a softmax function for signed 8-bit integer input vectors.
Definition: hpm_math.h:15874
static void hpm_nn_softmax_s16_fast(const q15_t *in_vec, const uint16_t size, q15_t *out_vec)
This is a softmax function for signed 16-bit integer input vectors.
Definition: hpm_math.h:15892
static int32_t hpm_nn_top_k_s8(q7_t *in_vec, uint32_t size, uint32_t k, q7_t *val, uint32_t *idx)
This function finds the k largest values and their indices from the signed 8-bit integer input vector...
Definition: hpm_math.h:16095
static void hpm_nn_reshape_s8(const int8_t *in_tensor, int8_t *out_tensor, const uint32_t size)
This function turns the input tensor into another tensor with the same data but in a different shape.
Definition: hpm_math.h:16065
static void hpm_dsp_sort_merge_init_f32(riscv_dsp_sort_merge_f32_t *instance, riscv_dsp_sort_order order, float32_t *buf)
Definition: hpm_math.h:6547
__STATIC_FORCEINLINE int32_t hpm_nn_read_s8x4_ia(const int8_t **in_s8)
Read 4 s8 from s8 pointer and post increment pointer.
Definition: hpm_math.h:6743
static void hpm_dsp_sort_f32(const riscv_dsp_sort_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Generic sorting function.
Definition: hpm_math.h:6525
#define Q31_MIN
Definition: hpm_math.h:6607
#define RIGHT_SHIFT(_shift)
Definition: hpm_math.h:6605
#define LEFT_SHIFT(_shift)
Definition: hpm_math.h:6604
__STATIC_FORCEINLINE void hpm_nn_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
Definition: hpm_math.h:6753
__STATIC_FORCEINLINE q31_t hpm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
Definition: hpm_math.h:6687
__STATIC_FORCEINLINE q31_t hpm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
Rounding divide by power of two.
Definition: hpm_math.h:6668
#define Q31_MAX
Definition: hpm_math.h:6606
static void hpm_dsp_sort_merge_f32(const riscv_dsp_sort_merge_f32_t *instance, float32_t *src, float32_t *dst, uint32_t size)
Merge sort.
Definition: hpm_math.h:6587
__STATIC_FORCEINLINE const q7_t * read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words
Definition: hpm_math.h:6726
__STATIC_FORCEINLINE const q7_t * read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
read and expand one q7 word into two q15 words with reordering
Definition: hpm_math.h:6712
__STATIC_FORCEINLINE q31_t hpm_nn_read_q7x4_ia(const q7_t **in_q7)
Read 4 q7 from q7 pointer and post increment pointer.
Definition: hpm_math.h:6698
static void write_q15x2_ia(q15_t **pQ15, q31_t value)
Definition: hpm_math.h:6609
static void hpm_dsp_sort_init_f32(riscv_dsp_sort_f32_t *instance, riscv_dsp_sort_alg alg, riscv_dsp_sort_order order)
Definition: hpm_math.h:6478
__STATIC_FORCEINLINE q31_t hpm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
Saturating doubling high multiply. Result matches NEON instruction VQRDMULH.
Definition: hpm_math.h:6643
__STATIC_FORCEINLINE q31_t hpm_nn_read_q15x2_ia(const q15_t **in_q15)
Read 2 q15 elements and post increment pointer.
Definition: hpm_math.h:6625
static float32_t hpm_dsp_std_f32(const float32_t *src, uint32_t size)
Standard deviation of the floating-potint vector.
Definition: hpm_math.h:565
static uint32_t hpm_dsp_gaussian_naive_bayes_est_f32(const riscv_dsp_gaussian_naivebayes_f32_t *instance, const float32_t *src, float32_t *buf)
Naive Gaussian Bayesian Estimator.
Definition: hpm_math.h:810
static float32_t hpm_dsp_var_f32(const float32_t *src, uint32_t size)
Variance of the floating-potint vector.
Definition: hpm_math.h:656
static q15_t hpm_dsp_max_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum value of the q15 vector.
Definition: hpm_math.h:120
static q63_t hpm_dsp_pwr_q15(const q15_t *src, uint32_t size)
Sum of the squares of the q15 vector.
Definition: hpm_math.h:422
static q7_t hpm_dsp_mean_q7(const q7_t *src, uint32_t size)
Mean value of the q7 vector.
Definition: hpm_math.h:361
static q15_t hpm_dsp_absmin_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q15 vector.
Definition: hpm_math.h:908
static uint8_t hpm_dsp_max_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Max value of the u8 vector.
Definition: hpm_math.h:180
static q7_t hpm_dsp_absmin_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q7 vector.
Definition: hpm_math.h:922
static uint8_t hpm_dsp_min_u8(const uint8_t *src, uint32_t size, uint32_t *index)
Minimum value of the u8 vector.
Definition: hpm_math.h:275
static q7_t hpm_dsp_max_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum value of the q7 vector.
Definition: hpm_math.h:160
static float32_t hpm_dsp_absmin_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the floating-potint vector.
Definition: hpm_math.h:880
static q31_t hpm_dsp_rms_q31(const q31_t *src, uint32_t size)
RMS of the q31 vector.
Definition: hpm_math.h:545
static q7_t hpm_dsp_min_q7(const q7_t *src, uint32_t size, uint32_t *index)
Minimum value of the q7 vector.
Definition: hpm_math.h:255
static q63_t hpm_dsp_var_q31(const q31_t *src, uint32_t size)
Variance of the q31 vector.
Definition: hpm_math.h:708
static q31_t hpm_dsp_max_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum value of the q31 vector.
Definition: hpm_math.h:140
static q31_t hpm_dsp_mean_q31(const q31_t *src, uint32_t size)
Mean value of the q31 vector.
Definition: hpm_math.h:337
static q31_t hpm_dsp_absmax_q31(const q31_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q31 vector.
Definition: hpm_math.h:852
static float32_t hpm_dsp_entropy_f32(const float32_t *src, uint32_t size)
Entropy of the floating-potint vector.
Definition: hpm_math.h:729
static float32_t hpm_dsp_mean_f32(const float32_t *src, uint32_t size)
Mean value of the floating-potint vector.
Definition: hpm_math.h:289
static float32_t hpm_dsp_rms_f32(const float32_t *src, uint32_t size)
RMS of the floating-potint vector.
Definition: hpm_math.h:493
static float32_t hpm_dsp_lse_f32(const float32_t *src, uint32_t size)
Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:770
static q31_t hpm_dsp_pwr_q7(const q7_t *src, uint32_t size)
Sum of the squares of the q7 vector.
Definition: hpm_math.h:473
static q31_t hpm_dsp_absmin_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum absolute value of the q31 vector.
Definition: hpm_math.h:894
static float32_t hpm_dsp_max_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum value of the floating-potint vector.
Definition: hpm_math.h:95
static float32_t hpm_dsp_absmax_f32(const float32_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the floating-potint vector.
Definition: hpm_math.h:824
static q31_t hpm_dsp_min_q31(const q31_t *src, uint32_t size, uint32_t *index)
Minimum value of the q31 vector.
Definition: hpm_math.h:235
static q15_t hpm_dsp_std_u8(const uint8_t *src, uint32_t size)
Standard deviation of the u8 vector.
Definition: hpm_math.h:642
static q15_t hpm_dsp_min_q15(const q15_t *src, uint32_t size, uint32_t *index)
Minimum value of the q15 vector.
Definition: hpm_math.h:215
static q15_t hpm_dsp_rms_q15(const q15_t *src, uint32_t size)
RMS of the q15 vector.
Definition: hpm_math.h:519
static float32_t hpm_dsp_min_f32(const float32_t *src, uint32_t size, uint32_t *index)
Minimum value of the floating-potint vector.
Definition: hpm_math.h:195
static q63_t hpm_dsp_pwr_q31(const q31_t *src, uint32_t size)
Sum of the squares of the q31 vector.
Definition: hpm_math.h:448
static float32_t hpm_dsp_max_val_f32(const float32_t *src, uint32_t size)
Definition: hpm_math.h:107
static q31_t hpm_dsp_var_q15(const q15_t *src, uint32_t size)
Variance of the q15 vector.
Definition: hpm_math.h:682
static q7_t hpm_dsp_absmax_q7(const q7_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q7 vector.
Definition: hpm_math.h:866
static q15_t hpm_dsp_mean_q15(const q15_t *src, uint32_t size)
Mean value of the q15 vector.
Definition: hpm_math.h:313
static q15_t hpm_dsp_std_q15(const q15_t *src, uint32_t size)
Standard deviation of the q15 vector.
Definition: hpm_math.h:591
static float32_t hpm_dsp_lse_dprod_f32(const float32_t *src1, const float32_t *src2, uint32_t size, float32_t *buffer)
Dot product with Log-Sum-Exp of the floating-potint vector.
Definition: hpm_math.h:790
static q15_t hpm_dsp_absmax_q15(const q15_t *src, uint32_t size, uint32_t *index)
Maximum absolute value of the q15 vector.
Definition: hpm_math.h:838
static uint8_t hpm_dsp_mean_u8(const uint8_t *src, uint32_t size)
Mean value of the u8 vector.
Definition: hpm_math.h:383
static q31_t hpm_dsp_std_q31(const q31_t *src, uint32_t size)
Standard deviation of the q31 vector.
Definition: hpm_math.h:617
static float32_t hpm_dsp_relative_entropy_f32(const float32_t *src1, const float32_t *src2, uint32_t size)
Relative Entropy of the floating-potint vector.
Definition: hpm_math.h:752
static float32_t hpm_dsp_pwr_f32(const float32_t *src, uint32_t size)
Sum of the squares of the floating-potint vector.
Definition: hpm_math.h:397
static void hpm_dsp_svm_linear_est_f32(const riscv_dsp_svm_linear_f32_t *instance, const float32_t *src, int32_t *result)
SVM linear prediction.
Definition: hpm_math.h:4818
static void hpm_dsp_svm_rbf_est_f32(const riscv_dsp_svm_rbf_f32_t *instance, const float32_t *src, int32_t *result)
SVM rbf prediction.
Definition: hpm_math.h:4846
static void hpm_dsp_svm_poly_est_f32(const riscv_dsp_svm_poly_f32_t *instance, const float32_t *src, int32_t *result)
SVM polynomial prediction.
Definition: hpm_math.h:4860
static void hpm_dsp_svm_sigmoid_est_f32(const riscv_dsp_svm_sigmoid_f32_t *instance, const float32_t *src, int32_t *result)
SVM Sigmoid prediction.
Definition: hpm_math.h:4832
static void hpm_dsp_cifft_q15(q15_t *src, uint32_t m)
cifft of q15 vectors.
Definition: hpm_math.h:5317
static void hpm_dsp_dct4_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5738
static int32_t hpm_dsp_cfft_rd2_f32(float32_t *src, uint32_t m)
cfft_rd2 of f32 vectors.
Definition: hpm_math.h:4916
static void hpm_dsp_cfft_f64(float64_t *src, uint32_t m)
cfft of f64 vectors.
Definition: hpm_math.h:5236
static void hpm_dsp_idct4_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5757
static void hpm_dsp_cifft_f32(float32_t *src, uint32_t m)
cifft of f32 vectors.
Definition: hpm_math.h:5253
static int32_t hpm_dsp_cifft_rd4_q15(q15_t *src, uint32_t m)
cifft_rd4 of q15 vectors.
Definition: hpm_math.h:5140
static int32_t hpm_dsp_rifft_f32(float32_t *src, uint32_t m)
rifft of f32 vectors.
Definition: hpm_math.h:5435
static int32_t hpm_dsp_cfft_rd4_q31(q31_t *src, uint32_t m)
cfft_rd4 of q31 vectors.
Definition: hpm_math.h:5164
static int32_t hpm_dsp_rifft_q15(q15_t *src, uint32_t m)
rifft of q15 vectors.
Definition: hpm_math.h:5489
static int32_t hpm_dsp_cifft_rd2_q31(q31_t *src, uint32_t m)
cfft_rd2 of q31 vectors.
Definition: hpm_math.h:5032
static void hpm_dsp_cfft_q15(q15_t *src, uint32_t m)
cfft of q15 vectors.
Definition: hpm_math.h:5294
static void hpm_dsp_cfft_f32(float32_t *src, uint32_t m)
cfft of f32 vectors.
Definition: hpm_math.h:5219
static int32_t hpm_dsp_rifft_q31(q31_t *src, uint32_t m)
rifft of q31 vectors.
Definition: hpm_math.h:5529
static void hpm_dsp_dct_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5624
static void hpm_dsp_idct_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5605
static void hpm_dsp_idct_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5567
static void hpm_dsp_idct4_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5681
static int32_t hpm_dsp_rfft_f64(float64_t *src, uint32_t m)
rfft of f64 vectors.
Definition: hpm_math.h:5421
static int32_t hpm_dsp_cfft_rd4_q15(q15_t *src, uint32_t m)
cfft_rd4 of q15 vectors.
Definition: hpm_math.h:5116
static void hpm_dsp_cifft_q31(q31_t *src, uint32_t m)
cifft of q31 vectors.
Definition: hpm_math.h:5363
static int32_t hpm_dsp_cifft_rd4_f32(float32_t *src, uint32_t m)
cifft_rd4 of f32 vectors.
Definition: hpm_math.h:5092
static void hpm_dsp_dct_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5554
static void hpm_dsp_cifft_f64(float64_t *src, uint32_t m)
cifft of f64 vectors.
Definition: hpm_math.h:5270
static void hpm_dsp_cfft_q31(q31_t *src, uint32_t m)
cfft of q31 vectors.
Definition: hpm_math.h:5340
static int32_t hpm_dsp_cifft_rd2_f32(float32_t *src, uint32_t m)
cifft_rd2 of f32 vectors.
Definition: hpm_math.h:4934
static int32_t hpm_dsp_cfft_rd2_q31(q31_t *src, uint32_t m)
cfft_rd2 of q31 vectors.
Definition: hpm_math.h:5007
static void hpm_dsp_idct_q31(q31_t *src, uint32_t m)
Definition: hpm_math.h:5643
static void hpm_dsp_dct_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5586
static int32_t hpm_dsp_rfft_q31(q31_t *src, uint32_t m)
rfft of q31 vectors.
Definition: hpm_math.h:5509
static int32_t hpm_dsp_rfft_q15(q15_t *src, uint32_t m)
rfft of q15 vectors.
Definition: hpm_math.h:5469
static void hpm_dsp_idct4_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5719
static void hpm_dsp_dct4_q15(q15_t *src, uint32_t m)
Definition: hpm_math.h:5700
static int32_t hpm_dsp_rifft_f64(float64_t *src, uint32_t m)
rifft of f64 vectors.
Definition: hpm_math.h:5449
static int32_t hpm_dsp_cfft_rd2_q15(q15_t *src, uint32_t m)
cfft_rd2 of q15 vectors.
Definition: hpm_math.h:4959
static int32_t hpm_dsp_cifft_rd4_q31(q31_t *src, uint32_t m)
cifft_rd4 of q31 vectors.
Definition: hpm_math.h:5188
void hpm_software_cfft_float(float *src, uint32_t m)
Software implementation does not depend on any hardware.
static int32_t hpm_dsp_cfft_rd4_f32(float32_t *src, uint32_t m)
cfft_rd4 of f32 vectors.
Definition: hpm_math.h:5073
static int32_t hpm_dsp_cifft_rd2_q15(q15_t *src, uint32_t m)
cifft_rd2 of q15 vectors.
Definition: hpm_math.h:4983
static int32_t hpm_dsp_rfft_f32(float32_t *src, uint32_t m)
rfft of f32 vectors.
Definition: hpm_math.h:5403
static void hpm_dsp_dct4_f32(float32_t *src, uint32_t m)
Definition: hpm_math.h:5668
static void hpm_dsp_dup_f32(float32_t *src, float32_t *dst, uint32_t size)
Duplicate the floating vector.
Definition: hpm_math.h:6225
static void hpm_dsp_set_f32(float32_t val, float32_t *dst, uint32_t size)
Set the floating-point vector.
Definition: hpm_math.h:6278
static float32_t hpm_dsp_atan2_f32(float32_t srcy, float32_t src2)
Definition: hpm_math.h:5997
static void hpm_dsp_convert_q31_q15(q31_t *src, q15_t *dst, uint32_t size)
Convert a Q31 vector to Q15.
Definition: hpm_math.h:6159
static void hpm_dsp_set_q15(q15_t val, q15_t *dst, uint32_t size)
Set the Q15 vector.
Definition: hpm_math.h:6291
static float32_t hpm_dsp_exp_f32(float32_t src)
Calculate exponential value of f32 vector.
Definition: hpm_math.h:6360
static float32_t hpm_dsp_sin_f32(float32_t src)
Definition: hpm_math.h:5945
static float32_t hpm_dsp_sigmoid_f32(float32_t src)
Calculate sigmoid value of f32 vector.
Definition: hpm_math.h:6386
static void hpm_dsp_convert_q31_f32(q31_t *src, float32_t *dst, uint32_t size)
Convert a Q31 vector to floating.
Definition: hpm_math.h:6142
static q15_t hpm_dsp_atan_q15(q15_t src)
Definition: hpm_math.h:5991
static q31_t hpm_dsp_sin_q31(q31_t src)
Definition: hpm_math.h:5965
static void hpm_dsp_convert_f32_q31(float32_t *src, q31_t *dst, uint32_t size)
Convert a floating-point vector to Q31.
Definition: hpm_math.h:6073
static q31_t hpm_dsp_cos_q31(q31_t src)
Definition: hpm_math.h:5932
static q31_t hpm_dsp_atan2_q31(q31_t srcy, q31_t src2)
Definition: hpm_math.h:6009
static void hpm_dsp_dup_q31(q31_t *src, q31_t *dst, uint32_t size)
Duplicate the Q31 vector.
Definition: hpm_math.h:6251
static void hpm_dsp_convert_q15_q7(q15_t *src, q7_t *dst, uint32_t size)
Convert a Q15 vector to Q7.
Definition: hpm_math.h:6129
static void hpm_dsp_set_q31(q31_t val, q31_t *dst, uint32_t size)
Set the Q31 vector.
Definition: hpm_math.h:6304
static q15_t hpm_dsp_sin_q15(q15_t src)
Definition: hpm_math.h:5971
static void hpm_dsp_dup_q15(q15_t *src, q15_t *dst, uint32_t size)
Duplicate the Q15 vector.
Definition: hpm_math.h:6238
static void hpm_dsp_convert_q15_q31(q15_t *src, q31_t *dst, uint32_t size)
Convert a Q15 vector to Q31.
Definition: hpm_math.h:6116
static q15_t hpm_dsp_cos_q15(q15_t src)
Definition: hpm_math.h:5938
static float32_t hpm_dsp_cos_f32(float32_t src)
Definition: hpm_math.h:5926
static void hpm_dsp_barycenter_f32(const float32_t *src, const float32_t *weights, float32_t *out, uint32_t numofvec, uint32_t dimofvec)
Barycenter of the floating-potint type.
Definition: hpm_math.h:6348
static q15_t hpm_dsp_atan2_q15(q15_t srcy, q15_t src2)
Definition: hpm_math.h:6003
static void hpm_dsp_convert_q7_q15(q7_t *src, q15_t *dst, uint32_t size)
Convert a Q7 vector to Q15.
Definition: hpm_math.h:6198
static void hpm_dsp_convert_q31_q7(q31_t *src, q7_t *dst, uint32_t size)
Convert a Q31 vector to Q7.
Definition: hpm_math.h:6172
static void hpm_dsp_convert_f32_q15(float32_t *src, q15_t *dst, uint32_t size)
Convert a floating-point vector to Q15.
Definition: hpm_math.h:6060
static void hpm_dsp_set_q7(q7_t val, q7_t *dst, uint32_t size)
Set the Q7 vector.
Definition: hpm_math.h:6317
static q31_t hpm_dsp_atan_q31(q31_t src)
Definition: hpm_math.h:5985
static void hpm_dsp_convert_q7_q31(q7_t *src, q31_t *dst, uint32_t size)
Convert a Q7 vector to Q31.
Definition: hpm_math.h:6211
static void hpm_dsp_convert_q7_f32(q7_t *src, float32_t *dst, uint32_t size)
Convert a Q7 vector to floating.
Definition: hpm_math.h:6185
static float32_t hpm_dsp_log_f32(float32_t src)
Calculate the natural logarithm value of f32 vector.
Definition: hpm_math.h:6412
static void hpm_dsp_convert_q15_f32(q15_t *src, float32_t *dst, uint32_t size)
Convert a Q15 vector to floating.
Definition: hpm_math.h:6103
static float32_t hpm_dsp_weighted_sum_f32(const float32_t *src, const float32_t *weight, uint32_t size)
Weighted Sum of the floating-potint vector.
Definition: hpm_math.h:6332
static void hpm_dsp_convert_f32_q7(float32_t *src, q7_t *dst, uint32_t size)
Convert a floating-point vector to Q7.
Definition: hpm_math.h:6090
static q31_t hpm_dsp_sqrt_q31(q31_t src)
Square root of the q31 input.
Definition: hpm_math.h:6034
static float32_t hpm_dsp_sqrt_f32(float32_t src)
Square root of the floating-potint input.
Definition: hpm_math.h:6022
static void hpm_dsp_dup_q7(q7_t *src, q7_t *dst, uint32_t size)
Duplicate the Q7 vector.
Definition: hpm_math.h:6264
static q15_t hpm_dsp_sqrt_q15(q15_t src)
Square root of the q15 input.
Definition: hpm_math.h:6046
static float32_t hpm_dsp_atan_f32(float32_t src)
Definition: hpm_math.h:5979
uint32_t hpm_math_sw_reverse_bit32_msb_to_lsb(uint32_t msb)
Reserve 32bit data msb to lsb.
uint8_t hpm_math_sw_reverse_bit8_msb_to_lsb(uint8_t msb)
Reserve 8bit data msb to lsb.
uint32_t hpm_math_sw_reverse_bit32_lsb_to_msb(uint32_t lsb)
Reserve 32bit data lsb to msb.
uint8_t hpm_math_sw_reverse_bit8_lsb_to_msb(uint8_t lsb)
Reserve 8bit data lsb to msb.
FFT transform context.
Definition: hpm_ffa_drv.h:75
void * dst
Definition: hpm_ffa_drv.h:81
uint8_t dst_data_type
Definition: hpm_ffa_drv.h:78
uint8_t src_data_type
Definition: hpm_ffa_drv.h:77
const void * src
Definition: hpm_ffa_drv.h:80
uint32_t num_points
Definition: hpm_ffa_drv.h:79
uint16_t is_ifft
Definition: hpm_ffa_drv.h:76