20160411 firlpf.cu - rod8902/grrc2016 GitHub Wiki

global void calcFIR(const float * g_indata, float * g_outdata, const int nframes) { // access Block Width //const unsigned int bw = gridDim.x;

// access Block ID
//const unsigned int bix = blockIdx.x;

// access thread id
//	const unsigned int tid = threadIdx.x;

//int x = blockIdx.x*blockDim.x + threadIdx.x;

__shared__ float sharedM[2080];	// 2048 + 32(window_size)
int max_sharedsize = 2048;
int loadsize = max_sharedsize/blockDim.x;	// 64
int begin = loadsize*threadIdx.x;	//
int end = begin + loadsize;

// This is First algorithm
int samplerate = 44100;
float twopioversamplerate = (2*M_PI)/ samplerate;	//rod
float comp;	//rod
float amountoflast, amountofcurrent;
int cutoff = 500;	//500;

//printf("nframes = %d\n",nframes);
comp = 2 - cos(twopioversamplerate * cutoff);
amountoflast = comp - (float)sqrt( comp * comp -1);
amountofcurrent = 1 - amountoflast;
g_outdata[begin]=0.0f;

for( int i=begin; i<end+32; i++){
	sharedM[i] = g_indata[i];
}

__syncthreads();

/* for( int i=begin; i<end; i++){ g_outdata[i]=g_outdata[i]*amountoflast+ sharedM[i]*amountofcurrent; } */ #pragma unroll for(int i=begin; i<end; i++){ #pragma unroll for(int j=0; j<4; j++){ //if( i+j < nframes ){ g_outdata[i] = g_outdata[i]*amountoflast + sharedM[i+j]*amountofcurrent; //} /*else{ g_outdata[i] = sharedM[i];

		}*/
	}
}

/* for(int i=end-32; i < end; i++){ for( int j=i; j < end; j++) g_outdata[i] = g_outdata[i]*amountoflast + sharedM[j]*amountofcurrent; } */ __syncthreads();

/* for (int j=0; j<N-1; j++) { if(x+j>x+nframes) { printf("x+j > nframes\n"); __syncthreads(); return; } else { //g_outdata[x] = g_outdata[x] + g_indata[(x+j)]*h[j]/(M_PI); //g_outdata[x] = g_outdata[x]*amountoflast + (g_indata[(x+j)]*amountofcurrent)/(M_PI); g_outdata[x] = g_outdata[x]amountoflast + g_indata[x+j]amountofcurrent; } } __syncthreads(); / / //This is Second algorithm int samplerate = 44100; double cutoff = 500.0; double RC = 1.0/(cutoff2M_PI); double dt = 1.0/samplerate; double alpha = dt/(RC+dt); g_outdata[x]=0.0f; for (int j=0; j<N-1; j++) { //buf[j]=g_indata[x]; if(x+j>nframes) { __syncthreads(); return; } else { //g_outdata[x] = g_outdata[x] + g_indata[(x+j)]*h[j]/(M_PI); //g_outdata[x] = g_outdata[x]*amountoflast + (g_indata[(x+j)]*amountofcurrent)/(M_PI); //g_outdata[x] = g_outdata[x]*amountoflast + g_indata[(x+j)]*amountofcurrent; //printf("indata: %lf\n", g_indata[x]); g_outdata[x] = g_outdata[x] + (alpha * (g_indata[x] - g_outdata[x])); //g_outdata[x] = g_indata[x]; } } __syncthreads(); */

}