arm_rfft_q15.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 31. July 2014
  5. * $Revision: V1.4.4
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_rfft_q15.c
  9. *
  10. * Description: RFFT & RIFFT Q15 process function
  11. *
  12. *
  13. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * - Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in
  22. * the documentation and/or other materials provided with the
  23. * distribution.
  24. * - Neither the name of ARM LIMITED nor the names of its contributors
  25. * may be used to endorse or promote products derived from this
  26. * software without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39. * POSSIBILITY OF SUCH DAMAGE.
  40. * -------------------------------------------------------------------- */
  41. #include "arm_math.h"
  42. /*--------------------------------------------------------------------
  43. * Internal functions prototypes
  44. --------------------------------------------------------------------*/
  45. void arm_split_rfft_q15(
  46. q15_t * pSrc,
  47. uint32_t fftLen,
  48. q15_t * pATable,
  49. q15_t * pBTable,
  50. q15_t * pDst,
  51. uint32_t modifier);
  52. void arm_split_rifft_q15(
  53. q15_t * pSrc,
  54. uint32_t fftLen,
  55. q15_t * pATable,
  56. q15_t * pBTable,
  57. q15_t * pDst,
  58. uint32_t modifier);
  59. /**
  60. * @addtogroup RealFFT
  61. * @{
  62. */
  63. /**
  64. * @brief Processing function for the Q15 RFFT/RIFFT.
  65. * @param[in] *S points to an instance of the Q15 RFFT/RIFFT structure.
  66. * @param[in] *pSrc points to the input buffer.
  67. * @param[out] *pDst points to the output buffer.
  68. * @return none.
  69. *
  70. * \par Input an output formats:
  71. * \par
  72. * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
  73. * Hence the output format is different for different RFFT sizes.
  74. * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
  75. * \par
  76. * \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
  77. * \par
  78. * \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
  79. */
  80. void arm_rfft_q15(
  81. const arm_rfft_instance_q15 * S,
  82. q15_t * pSrc,
  83. q15_t * pDst)
  84. {
  85. const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
  86. uint32_t i;
  87. uint32_t L2 = S->fftLenReal >> 1;
  88. /* Calculation of RIFFT of input */
  89. if(S->ifftFlagR == 1u)
  90. {
  91. /* Real IFFT core process */
  92. arm_split_rifft_q15(pSrc, L2, S->pTwiddleAReal,
  93. S->pTwiddleBReal, pDst, S->twidCoefRModifier);
  94. /* Complex IFFT process */
  95. arm_cfft_q15(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
  96. for(i=0;i<S->fftLenReal;i++)
  97. {
  98. pDst[i] = pDst[i] << 1;
  99. }
  100. }
  101. else
  102. {
  103. /* Calculation of RFFT of input */
  104. /* Complex FFT process */
  105. arm_cfft_q15(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
  106. /* Real FFT core process */
  107. arm_split_rfft_q15(pSrc, L2, S->pTwiddleAReal,
  108. S->pTwiddleBReal, pDst, S->twidCoefRModifier);
  109. }
  110. }
  111. /**
  112. * @} end of RealFFT group
  113. */
  114. /**
  115. * @brief Core Real FFT process
  116. * @param *pSrc points to the input buffer.
  117. * @param fftLen length of FFT.
  118. * @param *pATable points to the A twiddle Coef buffer.
  119. * @param *pBTable points to the B twiddle Coef buffer.
  120. * @param *pDst points to the output buffer.
  121. * @param modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  122. * @return none.
  123. * The function implements a Real FFT
  124. */
  125. void arm_split_rfft_q15(
  126. q15_t * pSrc,
  127. uint32_t fftLen,
  128. q15_t * pATable,
  129. q15_t * pBTable,
  130. q15_t * pDst,
  131. uint32_t modifier)
  132. {
  133. uint32_t i; /* Loop Counter */
  134. q31_t outR, outI; /* Temporary variables for output */
  135. q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
  136. q15_t *pSrc1, *pSrc2;
  137. #ifndef ARM_MATH_CM0_FAMILY
  138. q15_t *pD1, *pD2;
  139. #endif
  140. // pSrc[2u * fftLen] = pSrc[0];
  141. // pSrc[(2u * fftLen) + 1u] = pSrc[1];
  142. pCoefA = &pATable[modifier * 2u];
  143. pCoefB = &pBTable[modifier * 2u];
  144. pSrc1 = &pSrc[2];
  145. pSrc2 = &pSrc[(2u * fftLen) - 2u];
  146. #ifndef ARM_MATH_CM0_FAMILY
  147. /* Run the below code for Cortex-M4 and Cortex-M3 */
  148. i = 1u;
  149. pD1 = pDst + 2;
  150. pD2 = pDst + (4u * fftLen) - 2;
  151. for(i = fftLen - 1; i > 0; i--)
  152. {
  153. /*
  154. outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
  155. + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
  156. pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
  157. */
  158. /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
  159. pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
  160. pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
  161. #ifndef ARM_MATH_BIG_ENDIAN
  162. /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
  163. outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
  164. #else
  165. /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
  166. outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
  167. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  168. /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
  169. pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
  170. outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 16u;
  171. /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
  172. pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
  173. #ifndef ARM_MATH_BIG_ENDIAN
  174. outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
  175. #else
  176. outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
  177. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  178. /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
  179. outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
  180. /* write output */
  181. *pD1++ = (q15_t) outR;
  182. *pD1++ = outI >> 16u;
  183. /* write complex conjugate output */
  184. pD2[0] = (q15_t) outR;
  185. pD2[1] = -(outI >> 16u);
  186. pD2 -= 2;
  187. /* update coefficient pointer */
  188. pCoefB = pCoefB + (2u * modifier);
  189. pCoefA = pCoefA + (2u * modifier);
  190. }
  191. pDst[2u * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
  192. pDst[(2u * fftLen) + 1u] = 0;
  193. pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
  194. pDst[1] = 0;
  195. #else
  196. /* Run the below code for Cortex-M0 */
  197. i = 1u;
  198. while(i < fftLen)
  199. {
  200. /*
  201. outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
  202. + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
  203. pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
  204. */
  205. outR = *pSrc1 * *pCoefA;
  206. outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
  207. outR = outR + (*pSrc2 * *pCoefB);
  208. outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;
  209. /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
  210. pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
  211. pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
  212. */
  213. outI = *pSrc2 * *(pCoefB + 1);
  214. outI = outI - (*(pSrc2 + 1) * *pCoefB);
  215. outI = outI + (*(pSrc1 + 1) * *pCoefA);
  216. outI = outI + (*pSrc1 * *(pCoefA + 1));
  217. /* update input pointers */
  218. pSrc1 += 2u;
  219. pSrc2 -= 2u;
  220. /* write output */
  221. pDst[2u * i] = (q15_t) outR;
  222. pDst[(2u * i) + 1u] = outI >> 16u;
  223. /* write complex conjugate output */
  224. pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
  225. pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 16u);
  226. /* update coefficient pointer */
  227. pCoefB = pCoefB + (2u * modifier);
  228. pCoefA = pCoefA + (2u * modifier);
  229. i++;
  230. }
  231. pDst[2u * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
  232. pDst[(2u * fftLen) + 1u] = 0;
  233. pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
  234. pDst[1] = 0;
  235. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  236. }
  237. /**
  238. * @brief Core Real IFFT process
  239. * @param[in] *pSrc points to the input buffer.
  240. * @param[in] fftLen length of FFT.
  241. * @param[in] *pATable points to the twiddle Coef A buffer.
  242. * @param[in] *pBTable points to the twiddle Coef B buffer.
  243. * @param[out] *pDst points to the output buffer.
  244. * @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  245. * @return none.
  246. * The function implements a Real IFFT
  247. */
  248. void arm_split_rifft_q15(
  249. q15_t * pSrc,
  250. uint32_t fftLen,
  251. q15_t * pATable,
  252. q15_t * pBTable,
  253. q15_t * pDst,
  254. uint32_t modifier)
  255. {
  256. uint32_t i; /* Loop Counter */
  257. q31_t outR, outI; /* Temporary variables for output */
  258. q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
  259. q15_t *pSrc1, *pSrc2;
  260. q15_t *pDst1 = &pDst[0];
  261. pCoefA = &pATable[0];
  262. pCoefB = &pBTable[0];
  263. pSrc1 = &pSrc[0];
  264. pSrc2 = &pSrc[2u * fftLen];
  265. #ifndef ARM_MATH_CM0_FAMILY
  266. /* Run the below code for Cortex-M4 and Cortex-M3 */
  267. i = fftLen;
  268. while(i > 0u)
  269. {
  270. /*
  271. outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
  272. pIn[2 * n - 2 * i] * pBTable[2 * i] -
  273. pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
  274. outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
  275. pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
  276. pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
  277. */
  278. #ifndef ARM_MATH_BIG_ENDIAN
  279. /* pIn[2 * n - 2 * i] * pBTable[2 * i] -
  280. pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
  281. outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
  282. #else
  283. /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
  284. pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
  285. outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
  286. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  287. /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
  288. pIn[2 * n - 2 * i] * pBTable[2 * i] */
  289. outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 16u;
  290. /*
  291. -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
  292. pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
  293. outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
  294. /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
  295. #ifndef ARM_MATH_BIG_ENDIAN
  296. outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
  297. #else
  298. outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
  299. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  300. /* write output */
  301. #ifndef ARM_MATH_BIG_ENDIAN
  302. *__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 16u), 16);
  303. #else
  304. *__SIMD32(pDst1)++ = __PKHBT((outI >> 16u), outR, 16);
  305. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  306. /* update coefficient pointer */
  307. pCoefB = pCoefB + (2u * modifier);
  308. pCoefA = pCoefA + (2u * modifier);
  309. i--;
  310. }
  311. #else
  312. /* Run the below code for Cortex-M0 */
  313. i = fftLen;
  314. while(i > 0u)
  315. {
  316. /*
  317. outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
  318. pIn[2 * n - 2 * i] * pBTable[2 * i] -
  319. pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
  320. */
  321. outR = *pSrc2 * *pCoefB;
  322. outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
  323. outR = outR + (*pSrc1 * *pCoefA);
  324. outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
  325. /*
  326. outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
  327. pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
  328. pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
  329. */
  330. outI = *(pSrc1 + 1) * *pCoefA;
  331. outI = outI - (*pSrc1 * *(pCoefA + 1));
  332. outI = outI - (*pSrc2 * *(pCoefB + 1));
  333. outI = outI - (*(pSrc2 + 1) * *(pCoefB));
  334. /* update input pointers */
  335. pSrc1 += 2u;
  336. pSrc2 -= 2u;
  337. /* write output */
  338. *pDst1++ = (q15_t) outR;
  339. *pDst1++ = (q15_t) (outI >> 16);
  340. /* update coefficient pointer */
  341. pCoefB = pCoefB + (2u * modifier);
  342. pCoefA = pCoefA + (2u * modifier);
  343. i--;
  344. }
  345. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  346. }