arm_nnsupportfunctions.h 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /*
  2. * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_nnsupportfunctions.h
  21. * Description: Public header file of support functions for CMSIS NN Library
  22. *
  23. * $Date: 13. July 2018
  24. * $Revision: V.1.0.0
  25. *
  26. * Target Processor: Cortex-M cores
  27. * -------------------------------------------------------------------- */
  28. #ifndef _ARM_NNSUPPORTFUNCTIONS_H_
  29. #define _ARM_NNSUPPORTFUNCTIONS_H_
  30. #include "arm_math.h"
  31. #include "arm_common_tables.h"
  32. //#include <cstring>
  33. #ifdef __cplusplus
  34. extern "C"
  35. {
  36. #endif
  37. /**
  38. * @brief Union for SIMD access of Q31/Q15/Q7 types
  39. */
  40. union arm_nnword
  41. {
  42. q31_t word;
  43. /**< Q31 type */
  44. q15_t half_words[2];
  45. /**< Q15 type */
  46. q7_t bytes[4];
  47. /**< Q7 type */
  48. };
  49. /**
  50. * @brief Struct for specifying activation function types
  51. *
  52. */
  53. typedef enum
  54. {
  55. ARM_SIGMOID = 0,
  56. /**< Sigmoid activation function */
  57. ARM_TANH = 1,
  58. /**< Tanh activation function */
  59. } arm_nn_activation_type;
  60. /**
  61. * @defgroup nndata_convert Neural Network Data Conversion Functions
  62. *
  63. * Perform data type conversion in-between neural network operations
  64. *
  65. */
  66. /**
  67. * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift
  68. * @param[in] *pSrc points to the Q7 input vector
  69. * @param[out] *pDst points to the Q15 output vector
  70. * @param[in] blockSize length of the input vector
  71. * @return none.
  72. *
  73. */
  74. void arm_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
  75. /**
  76. * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
  77. * @param[in] *pSrc points to the Q7 input vector
  78. * @param[out] *pDst points to the Q15 output vector
  79. * @param[in] blockSize length of the input vector
  80. * @return none.
  81. *
  82. */
  83. void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
  84. #if defined (ARM_MATH_DSP)
  85. /**
  86. * @brief read and expand one Q7 word into two Q15 words
  87. */
  88. __STATIC_FORCEINLINE void *read_and_pad(void *source, q31_t * out1, q31_t * out2)
  89. {
  90. q31_t inA = *__SIMD32(source)++;
  91. q31_t inAbuf1 = __SXTB16(__ROR(inA, 8));
  92. q31_t inAbuf2 = __SXTB16(inA);
  93. #ifndef ARM_MATH_BIG_ENDIAN
  94. *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
  95. *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
  96. #else
  97. *out1 = __PKHTB(inAbuf1, inAbuf2, 16);
  98. *out2 = __PKHBT(inAbuf2, inAbuf1, 16);
  99. #endif
  100. return source;
  101. }
  102. /**
  103. * @brief read and expand one Q7 word into two Q15 words with reordering
  104. */
  105. __STATIC_FORCEINLINE void *read_and_pad_reordered(void *source, q31_t * out1, q31_t * out2)
  106. {
  107. q31_t inA = *__SIMD32(source)++;
  108. #ifndef ARM_MATH_BIG_ENDIAN
  109. *out2 = __SXTB16(__ROR(inA, 8));
  110. *out1 = __SXTB16(inA);
  111. #else
  112. *out1 = __SXTB16(__ROR(inA, 8));
  113. *out2 = __SXTB16(inA);
  114. #endif
  115. return source;
  116. }
  117. #endif
  118. /**
  119. * @defgroup NNBasicMath Basic Math Functions for Neural Network Computation
  120. *
  121. * Basic Math Functions for Neural Network Computation
  122. *
  123. */
  124. /**
  125. * @brief Q7 vector multiplication with variable output shifts
  126. * @param[in] *pSrcA pointer to the first input vector
  127. * @param[in] *pSrcB pointer to the second input vector
  128. * @param[out] *pDst pointer to the output vector
  129. * @param[in] out_shift amount of right-shift for output
  130. * @param[in] blockSize number of samples in each vector
  131. * @return none.
  132. *
  133. * <b>Scaling and Overflow Behavior:</b>
  134. * \par
  135. * The function uses saturating arithmetic.
  136. * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
  137. */
  138. void arm_nn_mult_q15(
  139. q15_t * pSrcA,
  140. q15_t * pSrcB,
  141. q15_t * pDst,
  142. const uint16_t out_shift,
  143. uint32_t blockSize);
  144. /**
  145. * @brief Q7 vector multiplication with variable output shifts
  146. * @param[in] *pSrcA pointer to the first input vector
  147. * @param[in] *pSrcB pointer to the second input vector
  148. * @param[out] *pDst pointer to the output vector
  149. * @param[in] out_shift amount of right-shift for output
  150. * @param[in] blockSize number of samples in each vector
  151. * @return none.
  152. *
  153. * <b>Scaling and Overflow Behavior:</b>
  154. * \par
  155. * The function uses saturating arithmetic.
  156. * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
  157. */
  158. void arm_nn_mult_q7(
  159. q7_t * pSrcA,
  160. q7_t * pSrcB,
  161. q7_t * pDst,
  162. const uint16_t out_shift,
  163. uint32_t blockSize);
  164. /**
  165. * @brief defition to adding rouding offset
  166. */
  167. #ifndef ARM_NN_TRUNCATE
  168. #define NN_ROUND(out_shift) ( 0x1 << (out_shift - 1) )
  169. #else
  170. #define NN_ROUND(out_shift) 0
  171. #endif
  172. #ifdef __cplusplus
  173. }
  174. #endif
  175. #endif