arm_cfft_radix4_f32.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix4_f32.c
  4. * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  5. *
  6. * $Date: 27. January 2017
  7. * $Revision: V.1.5.1
  8. *
  9. * Target Processor: Cortex-M cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "arm_math.h"
  29. extern void arm_bitreversal_f32(
  30. float32_t * pSrc,
  31. uint16_t fftSize,
  32. uint16_t bitRevFactor,
  33. uint16_t * pBitRevTab);
  34. void arm_radix4_butterfly_f32(
  35. float32_t * pSrc,
  36. uint16_t fftLen,
  37. float32_t * pCoef,
  38. uint16_t twidCoefModifier);
  39. void arm_radix4_butterfly_inverse_f32(
  40. float32_t * pSrc,
  41. uint16_t fftLen,
  42. float32_t * pCoef,
  43. uint16_t twidCoefModifier,
  44. float32_t onebyfftLen);
  45. /**
  46. * @ingroup groupTransforms
  47. */
  48. /**
  49. * @addtogroup ComplexFFT
  50. * @{
  51. */
  52. /**
  53. * @details
  54. * @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
  55. * @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed
  56. * in the future.
  57. * @param[in] *S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure.
  58. * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
  59. * @return none.
  60. */
  61. void arm_cfft_radix4_f32(
  62. const arm_cfft_radix4_instance_f32 * S,
  63. float32_t * pSrc)
  64. {
  65. if (S->ifftFlag == 1U)
  66. {
  67. /* Complex IFFT radix-4 */
  68. arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
  69. }
  70. else
  71. {
  72. /* Complex FFT radix-4 */
  73. arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  74. }
  75. if (S->bitReverseFlag == 1U)
  76. {
  77. /* Bit Reversal */
  78. arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  79. }
  80. }
  81. /**
  82. * @} end of ComplexFFT group
  83. */
  84. /* ----------------------------------------------------------------------
  85. * Internal helper function used by the FFTs
  86. * ---------------------------------------------------------------------- */
  87. /*
  88. * @brief Core function for the floating-point CFFT butterfly process.
  89. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  90. * @param[in] fftLen length of the FFT.
  91. * @param[in] *pCoef points to the twiddle coefficient buffer.
  92. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  93. * @return none.
  94. */
  95. void arm_radix4_butterfly_f32(
  96. float32_t * pSrc,
  97. uint16_t fftLen,
  98. float32_t * pCoef,
  99. uint16_t twidCoefModifier)
  100. {
  101. float32_t co1, co2, co3, si1, si2, si3;
  102. uint32_t ia1, ia2, ia3;
  103. uint32_t i0, i1, i2, i3;
  104. uint32_t n1, n2, j, k;
  105. #if defined (ARM_MATH_DSP)
  106. /* Run the below code for Cortex-M4 and Cortex-M3 */
  107. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  108. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  109. Ybminusd;
  110. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  111. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  112. float32_t *ptr1;
  113. float32_t p0,p1,p2,p3,p4,p5;
  114. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  115. /* Initializations for the first stage */
  116. n2 = fftLen;
  117. n1 = n2;
  118. /* n2 = fftLen/4 */
  119. n2 >>= 2U;
  120. i0 = 0U;
  121. ia1 = 0U;
  122. j = n2;
  123. /* Calculation of first stage */
  124. do
  125. {
  126. /* index calculation for the input as, */
  127. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  128. i1 = i0 + n2;
  129. i2 = i1 + n2;
  130. i3 = i2 + n2;
  131. xaIn = pSrc[(2U * i0)];
  132. yaIn = pSrc[(2U * i0) + 1U];
  133. xbIn = pSrc[(2U * i1)];
  134. ybIn = pSrc[(2U * i1) + 1U];
  135. xcIn = pSrc[(2U * i2)];
  136. ycIn = pSrc[(2U * i2) + 1U];
  137. xdIn = pSrc[(2U * i3)];
  138. ydIn = pSrc[(2U * i3) + 1U];
  139. /* xa + xc */
  140. Xaplusc = xaIn + xcIn;
  141. /* xb + xd */
  142. Xbplusd = xbIn + xdIn;
  143. /* ya + yc */
  144. Yaplusc = yaIn + ycIn;
  145. /* yb + yd */
  146. Ybplusd = ybIn + ydIn;
  147. /* index calculation for the coefficients */
  148. ia2 = ia1 + ia1;
  149. co2 = pCoef[ia2 * 2U];
  150. si2 = pCoef[(ia2 * 2U) + 1U];
  151. /* xa - xc */
  152. Xaminusc = xaIn - xcIn;
  153. /* xb - xd */
  154. Xbminusd = xbIn - xdIn;
  155. /* ya - yc */
  156. Yaminusc = yaIn - ycIn;
  157. /* yb - yd */
  158. Ybminusd = ybIn - ydIn;
  159. /* xa' = xa + xb + xc + xd */
  160. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  161. /* ya' = ya + yb + yc + yd */
  162. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  163. /* (xa - xc) + (yb - yd) */
  164. Xb12C_out = (Xaminusc + Ybminusd);
  165. /* (ya - yc) + (xb - xd) */
  166. Yb12C_out = (Yaminusc - Xbminusd);
  167. /* (xa + xc) - (xb + xd) */
  168. Xc12C_out = (Xaplusc - Xbplusd);
  169. /* (ya + yc) - (yb + yd) */
  170. Yc12C_out = (Yaplusc - Ybplusd);
  171. /* (xa - xc) - (yb - yd) */
  172. Xd12C_out = (Xaminusc - Ybminusd);
  173. /* (ya - yc) + (xb - xd) */
  174. Yd12C_out = (Xbminusd + Yaminusc);
  175. co1 = pCoef[ia1 * 2U];
  176. si1 = pCoef[(ia1 * 2U) + 1U];
  177. /* index calculation for the coefficients */
  178. ia3 = ia2 + ia1;
  179. co3 = pCoef[ia3 * 2U];
  180. si3 = pCoef[(ia3 * 2U) + 1U];
  181. Xb12_out = Xb12C_out * co1;
  182. Yb12_out = Yb12C_out * co1;
  183. Xc12_out = Xc12C_out * co2;
  184. Yc12_out = Yc12C_out * co2;
  185. Xd12_out = Xd12C_out * co3;
  186. Yd12_out = Yd12C_out * co3;
  187. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  188. //Xb12_out -= Yb12C_out * si1;
  189. p0 = Yb12C_out * si1;
  190. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  191. //Yb12_out += Xb12C_out * si1;
  192. p1 = Xb12C_out * si1;
  193. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  194. //Xc12_out -= Yc12C_out * si2;
  195. p2 = Yc12C_out * si2;
  196. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  197. //Yc12_out += Xc12C_out * si2;
  198. p3 = Xc12C_out * si2;
  199. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  200. //Xd12_out -= Yd12C_out * si3;
  201. p4 = Yd12C_out * si3;
  202. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  203. //Yd12_out += Xd12C_out * si3;
  204. p5 = Xd12C_out * si3;
  205. Xb12_out += p0;
  206. Yb12_out -= p1;
  207. Xc12_out += p2;
  208. Yc12_out -= p3;
  209. Xd12_out += p4;
  210. Yd12_out -= p5;
  211. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  212. pSrc[2U * i1] = Xc12_out;
  213. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  214. pSrc[(2U * i1) + 1U] = Yc12_out;
  215. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  216. pSrc[2U * i2] = Xb12_out;
  217. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  218. pSrc[(2U * i2) + 1U] = Yb12_out;
  219. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  220. pSrc[2U * i3] = Xd12_out;
  221. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  222. pSrc[(2U * i3) + 1U] = Yd12_out;
  223. /* Twiddle coefficients index modifier */
  224. ia1 += twidCoefModifier;
  225. /* Updating input index */
  226. i0++;
  227. }
  228. while (--j);
  229. twidCoefModifier <<= 2U;
  230. /* Calculation of second stage to excluding last stage */
  231. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  232. {
  233. /* Initializations for the first stage */
  234. n1 = n2;
  235. n2 >>= 2U;
  236. ia1 = 0U;
  237. /* Calculation of first stage */
  238. j = 0;
  239. do
  240. {
  241. /* index calculation for the coefficients */
  242. ia2 = ia1 + ia1;
  243. ia3 = ia2 + ia1;
  244. co1 = pCoef[ia1 * 2U];
  245. si1 = pCoef[(ia1 * 2U) + 1U];
  246. co2 = pCoef[ia2 * 2U];
  247. si2 = pCoef[(ia2 * 2U) + 1U];
  248. co3 = pCoef[ia3 * 2U];
  249. si3 = pCoef[(ia3 * 2U) + 1U];
  250. /* Twiddle coefficients index modifier */
  251. ia1 += twidCoefModifier;
  252. i0 = j;
  253. do
  254. {
  255. /* index calculation for the input as, */
  256. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  257. i1 = i0 + n2;
  258. i2 = i1 + n2;
  259. i3 = i2 + n2;
  260. xaIn = pSrc[(2U * i0)];
  261. yaIn = pSrc[(2U * i0) + 1U];
  262. xbIn = pSrc[(2U * i1)];
  263. ybIn = pSrc[(2U * i1) + 1U];
  264. xcIn = pSrc[(2U * i2)];
  265. ycIn = pSrc[(2U * i2) + 1U];
  266. xdIn = pSrc[(2U * i3)];
  267. ydIn = pSrc[(2U * i3) + 1U];
  268. /* xa - xc */
  269. Xaminusc = xaIn - xcIn;
  270. /* (xb - xd) */
  271. Xbminusd = xbIn - xdIn;
  272. /* ya - yc */
  273. Yaminusc = yaIn - ycIn;
  274. /* (yb - yd) */
  275. Ybminusd = ybIn - ydIn;
  276. /* xa + xc */
  277. Xaplusc = xaIn + xcIn;
  278. /* xb + xd */
  279. Xbplusd = xbIn + xdIn;
  280. /* ya + yc */
  281. Yaplusc = yaIn + ycIn;
  282. /* yb + yd */
  283. Ybplusd = ybIn + ydIn;
  284. /* (xa - xc) + (yb - yd) */
  285. Xb12C_out = (Xaminusc + Ybminusd);
  286. /* (ya - yc) - (xb - xd) */
  287. Yb12C_out = (Yaminusc - Xbminusd);
  288. /* xa + xc -(xb + xd) */
  289. Xc12C_out = (Xaplusc - Xbplusd);
  290. /* (ya + yc) - (yb + yd) */
  291. Yc12C_out = (Yaplusc - Ybplusd);
  292. /* (xa - xc) - (yb - yd) */
  293. Xd12C_out = (Xaminusc - Ybminusd);
  294. /* (ya - yc) + (xb - xd) */
  295. Yd12C_out = (Xbminusd + Yaminusc);
  296. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  297. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  298. Xb12_out = Xb12C_out * co1;
  299. Yb12_out = Yb12C_out * co1;
  300. Xc12_out = Xc12C_out * co2;
  301. Yc12_out = Yc12C_out * co2;
  302. Xd12_out = Xd12C_out * co3;
  303. Yd12_out = Yd12C_out * co3;
  304. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  305. //Xb12_out -= Yb12C_out * si1;
  306. p0 = Yb12C_out * si1;
  307. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  308. //Yb12_out += Xb12C_out * si1;
  309. p1 = Xb12C_out * si1;
  310. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  311. //Xc12_out -= Yc12C_out * si2;
  312. p2 = Yc12C_out * si2;
  313. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  314. //Yc12_out += Xc12C_out * si2;
  315. p3 = Xc12C_out * si2;
  316. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  317. //Xd12_out -= Yd12C_out * si3;
  318. p4 = Yd12C_out * si3;
  319. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  320. //Yd12_out += Xd12C_out * si3;
  321. p5 = Xd12C_out * si3;
  322. Xb12_out += p0;
  323. Yb12_out -= p1;
  324. Xc12_out += p2;
  325. Yc12_out -= p3;
  326. Xd12_out += p4;
  327. Yd12_out -= p5;
  328. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  329. pSrc[2U * i1] = Xc12_out;
  330. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  331. pSrc[(2U * i1) + 1U] = Yc12_out;
  332. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  333. pSrc[2U * i2] = Xb12_out;
  334. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  335. pSrc[(2U * i2) + 1U] = Yb12_out;
  336. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  337. pSrc[2U * i3] = Xd12_out;
  338. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  339. pSrc[(2U * i3) + 1U] = Yd12_out;
  340. i0 += n1;
  341. } while (i0 < fftLen);
  342. j++;
  343. } while (j <= (n2 - 1U));
  344. twidCoefModifier <<= 2U;
  345. }
  346. j = fftLen >> 2;
  347. ptr1 = &pSrc[0];
  348. /* Calculations of last stage */
  349. do
  350. {
  351. xaIn = ptr1[0];
  352. yaIn = ptr1[1];
  353. xbIn = ptr1[2];
  354. ybIn = ptr1[3];
  355. xcIn = ptr1[4];
  356. ycIn = ptr1[5];
  357. xdIn = ptr1[6];
  358. ydIn = ptr1[7];
  359. /* xa + xc */
  360. Xaplusc = xaIn + xcIn;
  361. /* xa - xc */
  362. Xaminusc = xaIn - xcIn;
  363. /* ya + yc */
  364. Yaplusc = yaIn + ycIn;
  365. /* ya - yc */
  366. Yaminusc = yaIn - ycIn;
  367. /* xb + xd */
  368. Xbplusd = xbIn + xdIn;
  369. /* yb + yd */
  370. Ybplusd = ybIn + ydIn;
  371. /* (xb-xd) */
  372. Xbminusd = xbIn - xdIn;
  373. /* (yb-yd) */
  374. Ybminusd = ybIn - ydIn;
  375. /* xa' = xa + xb + xc + xd */
  376. a0 = (Xaplusc + Xbplusd);
  377. /* ya' = ya + yb + yc + yd */
  378. a1 = (Yaplusc + Ybplusd);
  379. /* xc' = (xa-xb+xc-xd) */
  380. a2 = (Xaplusc - Xbplusd);
  381. /* yc' = (ya-yb+yc-yd) */
  382. a3 = (Yaplusc - Ybplusd);
  383. /* xb' = (xa+yb-xc-yd) */
  384. a4 = (Xaminusc + Ybminusd);
  385. /* yb' = (ya-xb-yc+xd) */
  386. a5 = (Yaminusc - Xbminusd);
  387. /* xd' = (xa-yb-xc+yd)) */
  388. a6 = (Xaminusc - Ybminusd);
  389. /* yd' = (ya+xb-yc-xd) */
  390. a7 = (Xbminusd + Yaminusc);
  391. ptr1[0] = a0;
  392. ptr1[1] = a1;
  393. ptr1[2] = a2;
  394. ptr1[3] = a3;
  395. ptr1[4] = a4;
  396. ptr1[5] = a5;
  397. ptr1[6] = a6;
  398. ptr1[7] = a7;
  399. /* increment pointer by 8 */
  400. ptr1 += 8U;
  401. } while (--j);
  402. #else
  403. float32_t t1, t2, r1, r2, s1, s2;
  404. /* Run the below code for Cortex-M0 */
  405. /* Initializations for the fft calculation */
  406. n2 = fftLen;
  407. n1 = n2;
  408. for (k = fftLen; k > 1U; k >>= 2U)
  409. {
  410. /* Initializations for the fft calculation */
  411. n1 = n2;
  412. n2 >>= 2U;
  413. ia1 = 0U;
  414. /* FFT Calculation */
  415. j = 0;
  416. do
  417. {
  418. /* index calculation for the coefficients */
  419. ia2 = ia1 + ia1;
  420. ia3 = ia2 + ia1;
  421. co1 = pCoef[ia1 * 2U];
  422. si1 = pCoef[(ia1 * 2U) + 1U];
  423. co2 = pCoef[ia2 * 2U];
  424. si2 = pCoef[(ia2 * 2U) + 1U];
  425. co3 = pCoef[ia3 * 2U];
  426. si3 = pCoef[(ia3 * 2U) + 1U];
  427. /* Twiddle coefficients index modifier */
  428. ia1 = ia1 + twidCoefModifier;
  429. i0 = j;
  430. do
  431. {
  432. /* index calculation for the input as, */
  433. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  434. i1 = i0 + n2;
  435. i2 = i1 + n2;
  436. i3 = i2 + n2;
  437. /* xa + xc */
  438. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  439. /* xa - xc */
  440. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  441. /* ya + yc */
  442. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  443. /* ya - yc */
  444. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  445. /* xb + xd */
  446. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  447. /* xa' = xa + xb + xc + xd */
  448. pSrc[2U * i0] = r1 + t1;
  449. /* xa + xc -(xb + xd) */
  450. r1 = r1 - t1;
  451. /* yb + yd */
  452. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  453. /* ya' = ya + yb + yc + yd */
  454. pSrc[(2U * i0) + 1U] = s1 + t2;
  455. /* (ya + yc) - (yb + yd) */
  456. s1 = s1 - t2;
  457. /* (yb - yd) */
  458. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  459. /* (xb - xd) */
  460. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  461. /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
  462. pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
  463. /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
  464. pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
  465. /* (xa - xc) + (yb - yd) */
  466. r1 = r2 + t1;
  467. /* (xa - xc) - (yb - yd) */
  468. r2 = r2 - t1;
  469. /* (ya - yc) - (xb - xd) */
  470. s1 = s2 - t2;
  471. /* (ya - yc) + (xb - xd) */
  472. s2 = s2 + t2;
  473. /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
  474. pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
  475. /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
  476. pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
  477. /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
  478. pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
  479. /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
  480. pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
  481. i0 += n1;
  482. } while ( i0 < fftLen);
  483. j++;
  484. } while (j <= (n2 - 1U));
  485. twidCoefModifier <<= 2U;
  486. }
  487. #endif /* #if defined (ARM_MATH_DSP) */
  488. }
  489. /*
  490. * @brief Core function for the floating-point CIFFT butterfly process.
  491. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  492. * @param[in] fftLen length of the FFT.
  493. * @param[in] *pCoef points to twiddle coefficient buffer.
  494. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  495. * @param[in] onebyfftLen value of 1/fftLen.
  496. * @return none.
  497. */
  498. void arm_radix4_butterfly_inverse_f32(
  499. float32_t * pSrc,
  500. uint16_t fftLen,
  501. float32_t * pCoef,
  502. uint16_t twidCoefModifier,
  503. float32_t onebyfftLen)
  504. {
  505. float32_t co1, co2, co3, si1, si2, si3;
  506. uint32_t ia1, ia2, ia3;
  507. uint32_t i0, i1, i2, i3;
  508. uint32_t n1, n2, j, k;
  509. #if defined (ARM_MATH_DSP)
  510. float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
  511. float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
  512. Ybminusd;
  513. float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
  514. float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
  515. float32_t *ptr1;
  516. float32_t p0,p1,p2,p3,p4,p5,p6,p7;
  517. float32_t a0,a1,a2,a3,a4,a5,a6,a7;
  518. /* Initializations for the first stage */
  519. n2 = fftLen;
  520. n1 = n2;
  521. /* n2 = fftLen/4 */
  522. n2 >>= 2U;
  523. i0 = 0U;
  524. ia1 = 0U;
  525. j = n2;
  526. /* Calculation of first stage */
  527. do
  528. {
  529. /* index calculation for the input as, */
  530. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  531. i1 = i0 + n2;
  532. i2 = i1 + n2;
  533. i3 = i2 + n2;
  534. /* Butterfly implementation */
  535. xaIn = pSrc[(2U * i0)];
  536. yaIn = pSrc[(2U * i0) + 1U];
  537. xcIn = pSrc[(2U * i2)];
  538. ycIn = pSrc[(2U * i2) + 1U];
  539. xbIn = pSrc[(2U * i1)];
  540. ybIn = pSrc[(2U * i1) + 1U];
  541. xdIn = pSrc[(2U * i3)];
  542. ydIn = pSrc[(2U * i3) + 1U];
  543. /* xa + xc */
  544. Xaplusc = xaIn + xcIn;
  545. /* xb + xd */
  546. Xbplusd = xbIn + xdIn;
  547. /* ya + yc */
  548. Yaplusc = yaIn + ycIn;
  549. /* yb + yd */
  550. Ybplusd = ybIn + ydIn;
  551. /* index calculation for the coefficients */
  552. ia2 = ia1 + ia1;
  553. co2 = pCoef[ia2 * 2U];
  554. si2 = pCoef[(ia2 * 2U) + 1U];
  555. /* xa - xc */
  556. Xaminusc = xaIn - xcIn;
  557. /* xb - xd */
  558. Xbminusd = xbIn - xdIn;
  559. /* ya - yc */
  560. Yaminusc = yaIn - ycIn;
  561. /* yb - yd */
  562. Ybminusd = ybIn - ydIn;
  563. /* xa' = xa + xb + xc + xd */
  564. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  565. /* ya' = ya + yb + yc + yd */
  566. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  567. /* (xa - xc) - (yb - yd) */
  568. Xb12C_out = (Xaminusc - Ybminusd);
  569. /* (ya - yc) + (xb - xd) */
  570. Yb12C_out = (Yaminusc + Xbminusd);
  571. /* (xa + xc) - (xb + xd) */
  572. Xc12C_out = (Xaplusc - Xbplusd);
  573. /* (ya + yc) - (yb + yd) */
  574. Yc12C_out = (Yaplusc - Ybplusd);
  575. /* (xa - xc) + (yb - yd) */
  576. Xd12C_out = (Xaminusc + Ybminusd);
  577. /* (ya - yc) - (xb - xd) */
  578. Yd12C_out = (Yaminusc - Xbminusd);
  579. co1 = pCoef[ia1 * 2U];
  580. si1 = pCoef[(ia1 * 2U) + 1U];
  581. /* index calculation for the coefficients */
  582. ia3 = ia2 + ia1;
  583. co3 = pCoef[ia3 * 2U];
  584. si3 = pCoef[(ia3 * 2U) + 1U];
  585. Xb12_out = Xb12C_out * co1;
  586. Yb12_out = Yb12C_out * co1;
  587. Xc12_out = Xc12C_out * co2;
  588. Yc12_out = Yc12C_out * co2;
  589. Xd12_out = Xd12C_out * co3;
  590. Yd12_out = Yd12C_out * co3;
  591. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  592. //Xb12_out -= Yb12C_out * si1;
  593. p0 = Yb12C_out * si1;
  594. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  595. //Yb12_out += Xb12C_out * si1;
  596. p1 = Xb12C_out * si1;
  597. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  598. //Xc12_out -= Yc12C_out * si2;
  599. p2 = Yc12C_out * si2;
  600. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  601. //Yc12_out += Xc12C_out * si2;
  602. p3 = Xc12C_out * si2;
  603. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  604. //Xd12_out -= Yd12C_out * si3;
  605. p4 = Yd12C_out * si3;
  606. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  607. //Yd12_out += Xd12C_out * si3;
  608. p5 = Xd12C_out * si3;
  609. Xb12_out -= p0;
  610. Yb12_out += p1;
  611. Xc12_out -= p2;
  612. Yc12_out += p3;
  613. Xd12_out -= p4;
  614. Yd12_out += p5;
  615. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  616. pSrc[2U * i1] = Xc12_out;
  617. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  618. pSrc[(2U * i1) + 1U] = Yc12_out;
  619. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  620. pSrc[2U * i2] = Xb12_out;
  621. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  622. pSrc[(2U * i2) + 1U] = Yb12_out;
  623. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  624. pSrc[2U * i3] = Xd12_out;
  625. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  626. pSrc[(2U * i3) + 1U] = Yd12_out;
  627. /* Twiddle coefficients index modifier */
  628. ia1 = ia1 + twidCoefModifier;
  629. /* Updating input index */
  630. i0 = i0 + 1U;
  631. } while (--j);
  632. twidCoefModifier <<= 2U;
  633. /* Calculation of second stage to excluding last stage */
  634. for (k = fftLen >> 2U; k > 4U; k >>= 2U)
  635. {
  636. /* Initializations for the first stage */
  637. n1 = n2;
  638. n2 >>= 2U;
  639. ia1 = 0U;
  640. /* Calculation of first stage */
  641. j = 0;
  642. do
  643. {
  644. /* index calculation for the coefficients */
  645. ia2 = ia1 + ia1;
  646. ia3 = ia2 + ia1;
  647. co1 = pCoef[ia1 * 2U];
  648. si1 = pCoef[(ia1 * 2U) + 1U];
  649. co2 = pCoef[ia2 * 2U];
  650. si2 = pCoef[(ia2 * 2U) + 1U];
  651. co3 = pCoef[ia3 * 2U];
  652. si3 = pCoef[(ia3 * 2U) + 1U];
  653. /* Twiddle coefficients index modifier */
  654. ia1 = ia1 + twidCoefModifier;
  655. i0 = j;
  656. do
  657. {
  658. /* index calculation for the input as, */
  659. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  660. i1 = i0 + n2;
  661. i2 = i1 + n2;
  662. i3 = i2 + n2;
  663. xaIn = pSrc[(2U * i0)];
  664. yaIn = pSrc[(2U * i0) + 1U];
  665. xbIn = pSrc[(2U * i1)];
  666. ybIn = pSrc[(2U * i1) + 1U];
  667. xcIn = pSrc[(2U * i2)];
  668. ycIn = pSrc[(2U * i2) + 1U];
  669. xdIn = pSrc[(2U * i3)];
  670. ydIn = pSrc[(2U * i3) + 1U];
  671. /* xa - xc */
  672. Xaminusc = xaIn - xcIn;
  673. /* (xb - xd) */
  674. Xbminusd = xbIn - xdIn;
  675. /* ya - yc */
  676. Yaminusc = yaIn - ycIn;
  677. /* (yb - yd) */
  678. Ybminusd = ybIn - ydIn;
  679. /* xa + xc */
  680. Xaplusc = xaIn + xcIn;
  681. /* xb + xd */
  682. Xbplusd = xbIn + xdIn;
  683. /* ya + yc */
  684. Yaplusc = yaIn + ycIn;
  685. /* yb + yd */
  686. Ybplusd = ybIn + ydIn;
  687. /* (xa - xc) - (yb - yd) */
  688. Xb12C_out = (Xaminusc - Ybminusd);
  689. /* (ya - yc) + (xb - xd) */
  690. Yb12C_out = (Yaminusc + Xbminusd);
  691. /* xa + xc -(xb + xd) */
  692. Xc12C_out = (Xaplusc - Xbplusd);
  693. /* (ya + yc) - (yb + yd) */
  694. Yc12C_out = (Yaplusc - Ybplusd);
  695. /* (xa - xc) + (yb - yd) */
  696. Xd12C_out = (Xaminusc + Ybminusd);
  697. /* (ya - yc) - (xb - xd) */
  698. Yd12C_out = (Yaminusc - Xbminusd);
  699. pSrc[(2U * i0)] = Xaplusc + Xbplusd;
  700. pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
  701. Xb12_out = Xb12C_out * co1;
  702. Yb12_out = Yb12C_out * co1;
  703. Xc12_out = Xc12C_out * co2;
  704. Yc12_out = Yc12C_out * co2;
  705. Xd12_out = Xd12C_out * co3;
  706. Yd12_out = Yd12C_out * co3;
  707. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  708. //Xb12_out -= Yb12C_out * si1;
  709. p0 = Yb12C_out * si1;
  710. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  711. //Yb12_out += Xb12C_out * si1;
  712. p1 = Xb12C_out * si1;
  713. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  714. //Xc12_out -= Yc12C_out * si2;
  715. p2 = Yc12C_out * si2;
  716. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  717. //Yc12_out += Xc12C_out * si2;
  718. p3 = Xc12C_out * si2;
  719. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  720. //Xd12_out -= Yd12C_out * si3;
  721. p4 = Yd12C_out * si3;
  722. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  723. //Yd12_out += Xd12C_out * si3;
  724. p5 = Xd12C_out * si3;
  725. Xb12_out -= p0;
  726. Yb12_out += p1;
  727. Xc12_out -= p2;
  728. Yc12_out += p3;
  729. Xd12_out -= p4;
  730. Yd12_out += p5;
  731. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  732. pSrc[2U * i1] = Xc12_out;
  733. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  734. pSrc[(2U * i1) + 1U] = Yc12_out;
  735. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  736. pSrc[2U * i2] = Xb12_out;
  737. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  738. pSrc[(2U * i2) + 1U] = Yb12_out;
  739. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  740. pSrc[2U * i3] = Xd12_out;
  741. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  742. pSrc[(2U * i3) + 1U] = Yd12_out;
  743. i0 += n1;
  744. } while (i0 < fftLen);
  745. j++;
  746. } while (j <= (n2 - 1U));
  747. twidCoefModifier <<= 2U;
  748. }
  749. /* Initializations of last stage */
  750. j = fftLen >> 2;
  751. ptr1 = &pSrc[0];
  752. /* Calculations of last stage */
  753. do
  754. {
  755. xaIn = ptr1[0];
  756. yaIn = ptr1[1];
  757. xbIn = ptr1[2];
  758. ybIn = ptr1[3];
  759. xcIn = ptr1[4];
  760. ycIn = ptr1[5];
  761. xdIn = ptr1[6];
  762. ydIn = ptr1[7];
  763. /* Butterfly implementation */
  764. /* xa + xc */
  765. Xaplusc = xaIn + xcIn;
  766. /* xa - xc */
  767. Xaminusc = xaIn - xcIn;
  768. /* ya + yc */
  769. Yaplusc = yaIn + ycIn;
  770. /* ya - yc */
  771. Yaminusc = yaIn - ycIn;
  772. /* xb + xd */
  773. Xbplusd = xbIn + xdIn;
  774. /* yb + yd */
  775. Ybplusd = ybIn + ydIn;
  776. /* (xb-xd) */
  777. Xbminusd = xbIn - xdIn;
  778. /* (yb-yd) */
  779. Ybminusd = ybIn - ydIn;
  780. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  781. a0 = (Xaplusc + Xbplusd);
  782. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  783. a1 = (Yaplusc + Ybplusd);
  784. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  785. a2 = (Xaplusc - Xbplusd);
  786. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  787. a3 = (Yaplusc - Ybplusd);
  788. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  789. a4 = (Xaminusc - Ybminusd);
  790. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  791. a5 = (Yaminusc + Xbminusd);
  792. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  793. a6 = (Xaminusc + Ybminusd);
  794. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  795. a7 = (Yaminusc - Xbminusd);
  796. p0 = a0 * onebyfftLen;
  797. p1 = a1 * onebyfftLen;
  798. p2 = a2 * onebyfftLen;
  799. p3 = a3 * onebyfftLen;
  800. p4 = a4 * onebyfftLen;
  801. p5 = a5 * onebyfftLen;
  802. p6 = a6 * onebyfftLen;
  803. p7 = a7 * onebyfftLen;
  804. /* xa' = (xa+xb+xc+xd) * onebyfftLen */
  805. ptr1[0] = p0;
  806. /* ya' = (ya+yb+yc+yd) * onebyfftLen */
  807. ptr1[1] = p1;
  808. /* xc' = (xa-xb+xc-xd) * onebyfftLen */
  809. ptr1[2] = p2;
  810. /* yc' = (ya-yb+yc-yd) * onebyfftLen */
  811. ptr1[3] = p3;
  812. /* xb' = (xa-yb-xc+yd) * onebyfftLen */
  813. ptr1[4] = p4;
  814. /* yb' = (ya+xb-yc-xd) * onebyfftLen */
  815. ptr1[5] = p5;
  816. /* xd' = (xa-yb-xc+yd) * onebyfftLen */
  817. ptr1[6] = p6;
  818. /* yd' = (ya-xb-yc+xd) * onebyfftLen */
  819. ptr1[7] = p7;
  820. /* increment source pointer by 8 for next calculations */
  821. ptr1 = ptr1 + 8U;
  822. } while (--j);
  823. #else
  824. float32_t t1, t2, r1, r2, s1, s2;
  825. /* Run the below code for Cortex-M0 */
  826. /* Initializations for the first stage */
  827. n2 = fftLen;
  828. n1 = n2;
  829. /* Calculation of first stage */
  830. for (k = fftLen; k > 4U; k >>= 2U)
  831. {
  832. /* Initializations for the first stage */
  833. n1 = n2;
  834. n2 >>= 2U;
  835. ia1 = 0U;
  836. /* Calculation of first stage */
  837. j = 0;
  838. do
  839. {
  840. /* index calculation for the coefficients */
  841. ia2 = ia1 + ia1;
  842. ia3 = ia2 + ia1;
  843. co1 = pCoef[ia1 * 2U];
  844. si1 = pCoef[(ia1 * 2U) + 1U];
  845. co2 = pCoef[ia2 * 2U];
  846. si2 = pCoef[(ia2 * 2U) + 1U];
  847. co3 = pCoef[ia3 * 2U];
  848. si3 = pCoef[(ia3 * 2U) + 1U];
  849. /* Twiddle coefficients index modifier */
  850. ia1 = ia1 + twidCoefModifier;
  851. i0 = j;
  852. do
  853. {
  854. /* index calculation for the input as, */
  855. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  856. i1 = i0 + n2;
  857. i2 = i1 + n2;
  858. i3 = i2 + n2;
  859. /* xa + xc */
  860. r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
  861. /* xa - xc */
  862. r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
  863. /* ya + yc */
  864. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  865. /* ya - yc */
  866. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  867. /* xb + xd */
  868. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  869. /* xa' = xa + xb + xc + xd */
  870. pSrc[2U * i0] = r1 + t1;
  871. /* xa + xc -(xb + xd) */
  872. r1 = r1 - t1;
  873. /* yb + yd */
  874. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  875. /* ya' = ya + yb + yc + yd */
  876. pSrc[(2U * i0) + 1U] = s1 + t2;
  877. /* (ya + yc) - (yb + yd) */
  878. s1 = s1 - t2;
  879. /* (yb - yd) */
  880. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  881. /* (xb - xd) */
  882. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  883. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  884. pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
  885. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  886. pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
  887. /* (xa - xc) - (yb - yd) */
  888. r1 = r2 - t1;
  889. /* (xa - xc) + (yb - yd) */
  890. r2 = r2 + t1;
  891. /* (ya - yc) + (xb - xd) */
  892. s1 = s2 + t2;
  893. /* (ya - yc) - (xb - xd) */
  894. s2 = s2 - t2;
  895. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  896. pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
  897. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  898. pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
  899. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  900. pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
  901. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  902. pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
  903. i0 += n1;
  904. } while ( i0 < fftLen);
  905. j++;
  906. } while (j <= (n2 - 1U));
  907. twidCoefModifier <<= 2U;
  908. }
  909. /* Initializations of last stage */
  910. n1 = n2;
  911. n2 >>= 2U;
  912. /* Calculations of last stage */
  913. for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
  914. {
  915. /* index calculation for the input as, */
  916. /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
  917. i1 = i0 + n2;
  918. i2 = i1 + n2;
  919. i3 = i2 + n2;
  920. /* Butterfly implementation */
  921. /* xa + xc */
  922. r1 = pSrc[2U * i0] + pSrc[2U * i2];
  923. /* xa - xc */
  924. r2 = pSrc[2U * i0] - pSrc[2U * i2];
  925. /* ya + yc */
  926. s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
  927. /* ya - yc */
  928. s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
  929. /* xc + xd */
  930. t1 = pSrc[2U * i1] + pSrc[2U * i3];
  931. /* xa' = xa + xb + xc + xd */
  932. pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
  933. /* (xa + xb) - (xc + xd) */
  934. r1 = r1 - t1;
  935. /* yb + yd */
  936. t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
  937. /* ya' = ya + yb + yc + yd */
  938. pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
  939. /* (ya + yc) - (yb + yd) */
  940. s1 = s1 - t2;
  941. /* (yb-yd) */
  942. t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
  943. /* (xb-xd) */
  944. t2 = pSrc[2U * i1] - pSrc[2U * i3];
  945. /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
  946. pSrc[2U * i1] = r1 * onebyfftLen;
  947. /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
  948. pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
  949. /* (xa - xc) - (yb-yd) */
  950. r1 = r2 - t1;
  951. /* (xa - xc) + (yb-yd) */
  952. r2 = r2 + t1;
  953. /* (ya - yc) + (xb-xd) */
  954. s1 = s2 + t2;
  955. /* (ya - yc) - (xb-xd) */
  956. s2 = s2 - t2;
  957. /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
  958. pSrc[2U * i2] = r1 * onebyfftLen;
  959. /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
  960. pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
  961. /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
  962. pSrc[2U * i3] = r2 * onebyfftLen;
  963. /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
  964. pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
  965. }
  966. #endif /* #if defined (ARM_MATH_DSP) */
  967. }