FFT - rosco-pc/propeller-wiki GitHub Wiki

As everybody knows, FFT stands for _F_ast _F_ourier _T_ransform. It is commonly available in some sort or another for most microprocessors, and now the propeller is no exception!. The implementation shown below uses a sine/cosine table, an input buffer (for real and imaginary part) and outputs to the same input buffers. Calculations are done using signed integer numbers. Multiplication is done using an unrolled Chip's algorithm with some improvements to make it signed-aware (using the _sgn variable). Bit reversal is performed using the convenient rev instruction, saving 75% of the time that a normal for/while algorithm will take. The butterfly loops are just standard without surprises, the variables are scaled for better use of the adds and subs instructions (signed math).

Spectrum of a run, using a base frequency by 5, like the test routine for int_fft.c describes

Input

The input should be an array of signed 16 bit values in the range -32768 to 32768. Scaling is applied automatically, so it would be a good idea to use the whole range. If your data is just 8 bit from an ADC, well scaling can be done for example in the decimation (bit-reversal) routine without incurring in too much overhead, Only real data is used in this bit-reversal. But imaginary data could be also shuffled adding few instructions.

Output

The output is a 2 dimensional array (like the input), with the real part occupying the first half and the imaginary the second one.

Samples

This implementation works for a 1024 sample FFT. It takes around 4720000 cycles, 20480 multiplications, 5120 passes of the inner butterfly. Expected. That is enough to get 16 fps (@80 MHz). That is without the absolute value calculation (1024 multiplications, and 512 square roots, 532000 cycles) and plot drawing routines.

Code

The code shown below can be optimized a bit more, but it works !


.section cog cog0 ' needed to generate

' Converted to Propeller Assembler by Pacito.Sys, based on int_fft.c by Tom Roberts
' with portability by Malcolm Slaney.
' Distributed under the terms of the GNU GPL v2.0.
'
' Integer FFT
' 16 bit signed values are used


NN=1024
BITS_NN=10
BITS_NNM1=9
BITS_DIFF=3

init                    mov     fft_fr,cnt_rsample_ptr  ' real part buffer, 2048 bytes
                        mov     fft_fi,cnt_isample_ptr  ' imag part buffer, 2048 bytes

                        mov     fft_n,#1
                        shl     fft_n,#BITS_NN          '1024 point fft

                        call    #decimate
                        call    #lets_rock
                        call    #calc_abs
                        call    #plot

init_end                jmp     #init_end               ' end

' bit-reversal, uses the nice rev instruction
decimate                mov     fft_ii,#1
                        mov     fft_ll,fft_n
ldecimate               mov     fft_jj,fft_ii
                        rev     fft_jj,#32-BITS_NN     ' BITS_NN will be reversed
                        cmp     fft_ii,fft_jj   wc
              if_nc     jmp     #ldecimate_5
                        mov     fft_fr_ii,fft_ii
                        mov     fft_fr_jj,fft_jj
                        shl     fft_fr_ii,#1
                        add     fft_fr_ii,fft_fr
                        rdword  fft_tr,fft_fr_ii
                        shl     fft_fr_jj,#1
                        add     fft_fr_jj,fft_fr
                        rdword  fft_result,fft_fr_jj
                        wrword  fft_tr,fft_fr_jj
                        wrword  fft_result,fft_fr_ii
ldecimate_5             add     fft_ii,#1
                        cmp     fft_ii,fft_ll   wc, wz
              if_c_or_z jmp     #ldecimate
decimate_ret            ret

' Calcs the 1024 point-FFT using 16 bit signed integers, some calculations
' are don with 32 bits

lets_rock               mov     fft_ll,#1
                        mov     fft_k,#BITS_NNM1

lets_rock_while         cmp     fft_ll,fft_n      wc
              if_nc     jmp     #lets_rock_while_e

                        mov     fft_is,fft_ll
                        shl     fft_is,#1
                        mov     fft_m,#0
lets_rock_for_1         cmp     fft_m,fft_ll      wc
              if_nc     jmp     #lets_rock_for_1_e

                        mov     fft_jj,fft_m
                        shl     fft_jj,fft_k

                        call    #get_sincos
                        mov     fft_ii,fft_m
lets_rock_for_2         cmp     fft_ii,fft_n      wc
              if_nc     jmp     #lets_rock_for_2_e

                        mov     fft_jj,fft_ii
                        add     fft_jj,fft_ll

                        mov     fft_fi_jj,fft_jj
                        shl     fft_fi_jj,#1      ' word access
                        mov     fft_fr_jj,fft_fi_jj
                        add     fft_fr_jj,fft_fr
                        add     fft_fi_jj,fft_fi

                        rdword  fft_result,fft_fr_jj
                        call    #lets_mul_wr
                        mov     fft_tr,fft_result
                        rdword  fft_result,fft_fi_jj
                        call    #lets_mul_wi
                        subs    fft_tr,fft_result     ' 32 bit signed value

                        rdword  fft_result,fft_fi_jj
                        call    #lets_mul_wr
                        mov     fft_ti,fft_result
                        rdword  fft_result,fft_fr_jj
                        call    #lets_mul_wi
                        adds    fft_ti,fft_result     ' 32 bit signed value

                        mov     fft_fi_ii,fft_ii
                        shl     fft_fi_ii,#1          ' word access
                        mov     fft_fr_ii,fft_fi_ii
                        add     fft_fr_ii,fft_fr
                        add     fft_fi_ii,fft_fi

                        rdword  fft_qr,fft_fr_ii      ' qr = fr[i]
                        shl     fft_qr,#16
                        sar     fft_qr,#1             ' scales to 32 bit signed value
                        mov     fft_result,fft_tr
                        rdword  fft_qi,fft_fi_ii      ' qi = fi[i]
                        shl     fft_qi,#16
                        sar     fft_qi,#1             ' scales to 32 bit signed value
                        adds    fft_result,fft_qr     ' res = tr + qr
                        subs    fft_qr,fft_tr         ' qr = qr - tr
                        shr     fft_result,#16        ' scales down
                        wrword  fft_result,fft_fr_ii  ' fr[i] = res = tr + qr

                        mov     fft_result,fft_ti
                        adds    fft_result,fft_qi     ' res = ti + qi
                        shr     fft_qr,#16            ' scales down
                        wrword  fft_qr,fft_fr_jj      ' fr[j] = qr = qr - tr
                        subs    fft_qi,fft_ti         ' qi = qi - ti
                        shr     fft_result,#16        ' scales down
                        wrword  fft_result,fft_fi_ii  ' fi[i] = ti + qi
                        shr     fft_qi,#16            ' scales down
                        add     fft_ii,fft_is
                        wrword  fft_qi,fft_fi_jj      ' fi[j] = qi = qi - ti
                        jmp     #lets_rock_for_2
lets_rock_for_2_e
                        add     fft_m,#1
                        jmp     #lets_rock_for_1
lets_rock_for_1_e       sub     fft_k,#1
                        mov     fft_ll,fft_is
                        jmp     #lets_rock_while
lets_rock_while_e
lets_rock_ret           ret


lets_mul_wi             mov     fft_sgn,fft_result
                        and     fft_sgn,cnt_sgn     wz
                        shl     fft_result,#16
                        negnz   fft_result,fft_result
                        shr     fft_result,#15

                        shr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wi   wc
                        rcr     fft_result,#1       wc
                        xor     fft_sgn,fft_sgnwi   wz
                        negnz   fft_result,fft_result
lets_mul_wi_ret         ret

lets_mul_wr             mov     fft_sgn,fft_result
                        and     fft_sgn,cnt_sgn     wz
                        shl     fft_result,#16
                        negnz   fft_result,fft_result
                        shr     fft_result,#15
                        shr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
              if_c      add     fft_result,fft_wr   wc
                        rcr     fft_result,#1       wc
                        xor     fft_sgn,fft_sgnwr   wz
                        negnz   fft_result,fft_result
lets_mul_wr_ret         ret

' Uses the ROM table to get the sine and cosine of jj

get_sincos              mov     fft_wr,fft_jj
                        shl     fft_wr,#BITS_DIFF
                        mov     fft_wi,fft_wr
                        add     fft_wr,cnt_sin_90
                        test    fft_wi,cnt_sin_90     wc
                        test    fft_wi,cnt_sin_180    wz
                        negc    fft_wi,fft_wi
                        or      fft_wi,cnt_sin_table
                        shl     fft_wi,#1
                        rdword  fft_wi,fft_wi
              if_z      mov     fft_sgnwi,cnt_sgn        ' they are inverted
              if_nz     mov     fft_sgnwi,#0
                        test    fft_wr,cnt_sin_90     wc
                        test    fft_wr,cnt_sin_180    wz
                        negc    fft_wr,fft_wr
                        or      fft_wr,cnt_sin_table
                        shl     fft_wr,#1
                        rdword  fft_wr,fft_wr
              if_nz     mov     fft_sgnwr,cnt_sgn        ' they are not inverted
              if_z      mov     fft_sgnwr,#0

                        shl     fft_wr,#14
                        shl     fft_wi,#14
get_sincos_ret          ret

lets_mul_qr             mov     fft_result,fft_qr
                        shl     fft_result,#16
                        abs     fft_result,fft_result
                        mov     fft_qr,fft_result
                        shr     fft_result,#16

                        shr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1        wc
              if_c      add     fft_result,fft_qr    wc
                        rcr     fft_result,#1
lets_mul_qr_ret         ret

lets_sqrt_qi            mov     fft_result,#0
                        mov     fft_m,#0
                        mov     fft_jj,#16
lets_sqrt_qi_l          shl     fft_qi,#1   wc
                        rcl     fft_m,#1
                        shl     fft_qi,#1   wc
                        rcl     fft_m,#1
                        shl     fft_result,#2
                        or      fft_result,#1
                        cmpsub  fft_m,fft_result   wc, wr
                        shr     fft_result,#2
                        rcl     fft_result,#1
                        djnz    fft_jj,#lets_sqrt_qi_l
lets_sqrt_qi_ret        ret

calc_abs                mov      fft_ii,#511
                        mov      fft_fr_ii,fft_fr
                        mov      fft_fi_ii,fft_fi
calc_abs_5              rdword   fft_qr,fft_fr_ii
                        call     #lets_mul_qr
                        mov      fft_qi,fft_result
                        rdword   fft_qr,fft_fi_ii
                        add      fft_fi_ii,#2          ' next word
                        call     #lets_mul_qr
                        add      fft_qi,fft_result
                        call     #lets_sqrt_qi
                        wrword   fft_result,fft_fr_ii
                        add      fft_fr_ii,#2          ' next word
                        djnz     fft_ii,#calc_abs_5
calc_abs_ret            ret

' This routine will draw the spectrum in a 1bpp 320x240 bitmap

plot                    mov      fft_ii,#40
                        mov      fft_jj,#0

                        mov      fft_fr_ii,fft_fr
plot_8p                 mov      fft_k,#$80
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        shr      fft_k,#1
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        shr      fft_k,#1
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        shr      fft_k,#1
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        shr      fft_k,#1
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        shr      fft_k,#1
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        shr      fft_k,#1
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        shr      fft_k,#1
                        rdword   fft_qr,fft_fr_ii
                        add      fft_fr_ii,#2
                        call     #putpix
                        add      fft_jj,#1
                        djnz     fft_ii,#plot_8p

plot_ret                ret

putpix                  mov      fft_qi,#239
                        max      fft_qi,fft_qr
                        mov      fft_qr,#239
                        sub      fft_qr,fft_qi
                        shl      fft_qr,#3
                        mov      fft_qi,fft_qr
                        shl      fft_qr,#2
                        add      fft_qr,fft_qi
                        add      fft_qr,cnt_bitmap_ptr
                        add      fft_qr,fft_jj
                        rdbyte   fft_ll,fft_qr
                        or       fft_ll,fft_k
                        wrbyte   fft_ll,fft_qr
putpix_ret              ret

' constants
cnt_sgn                 long    $8000
cnt_sin_90              long    $0800
cnt_sin_180             long    $1000
cnt_sin_table           long    $7000
cnt_rsample_ptr         long    $800
cnt_isample_ptr         long    $1000
cnt_bitmap_ptr          long    $4000
cnt_add_ptr             long    512

' Variables

fft_ii                  long    0
fft_is                  long    0
fft_jj                  long    0
fft_k                   long    0
fft_ll                  long    0
fft_m                   long    0
fft_n                   long    0

fft_qr                  long    0
fft_qi                  long    0
fft_fr                  long    0
fft_fi                  long    0
fft_tr                  long    0
fft_ti                  long    0
fft_wr                  long    0
fft_wi                  long    0
fft_fi_ii               long    0
fft_fr_ii               long    0
fft_fi_jj               long    0
fft_fr_jj               long    0
fft_result              long    0
fft_sgn                 long    0
fft_sgnwr               long    0  ' sign of wr
fft_sgnwi               long    0  ' sign of wi

This can be adapted with minor modifications to 256 or 512 points or extended to 2048 or 4096 points, in those cases some constants need adjustment

NN : Number of samples
BITS_NN : Log2(Number of samples)
BITS_NNM1 : BITS_NN - 1
BITS_DIFF : Difference between the amount of samples (divided by 2) and the samples in the sine table.

Some modifications to get_abs and plot may be then necessary. Note: This was tested with pPropellerSim, as of today not yet available with all bugs fixed (rev, max, etc).

Enjoy !