kitw - Developer IT

Search Results

Search found 2 results on 1 pages for 'kitw'.

Page 1/1 | 1

How CudaMalloc work?

- by kitw

I am trying to modify the imageDenosing class in CUDA SDK, I need to repeat the filter many time incase to capture the time. But my code doesn't work properly. //start __global__ void F1D(TColor *image,int imageW,int imageH, TColor *buffer) { const int ix = blockDim.x * blockIdx.x + threadIdx.x; const int iy = blockDim.y * blockIdx.y + threadIdx.y; if(iy != 0 && iy < imageH-1 && ix < imageW) { float4 fresult = get_color(image[imageW * iy + ix]); float4 fresult4 = get_color(image[imageW * (iy+1) + ix]); float4 fresult5 = get_color(image[imageW * (iy-1) + ix]); float4 fresult7; fresult7.x = fresult.x*0.5+fresult4.x*.25+fresult5.x*.25; fresult7.y = fresult.y*0.5+fresult4.y*.25+fresult5.y*.25; fresult7.z = fresult.z*0.5+fresult4.z*.25+fresult5.z*.25; buffer[imageW * iy + ix] = make_color(fresult7.x,fresult7.y,fresult7.z,0); } image[imageW * iy + ix] = buffer[imageW * iy + ix]; //should be use cudaMemcpy, But it fails } //extern extern "C" void cuda_F1D(TColor *dst, int imageW, int imageH) { dim3 threads(BLOCKDIM_X, BLOCKDIM_Y); dim3 grid(iDivUp(imageW, BLOCKDIM_X), iDivUp(imageH, BLOCKDIM_Y)); Copy<<<grid, threads>>>(dst, imageW, imageH); size_t size = imageW*imageH*sizeof(TColor); TColor *host =(TColor*) malloc(size); TColor *dst2; //TColor *dst3; //TColor *d = new TColor(imageW*imageH*sizeof(TColor)); dim3 threads2(imageW,1); dim3 grid2(iDivUp(imageW, imageW), iDivUp(imageH, 1)); *for(int i = 0;i<1;i++) { cudaMalloc( (void **)&dst2, size); cudaMemcpy(dst2, dst, imageW*imageH*sizeof(TColor),cudaMemcpyHostToDevice); //cudaMalloc( (void **)&dst3, imageW*imageH*sizeof(TColor)); //cudaMemcpy(dst3, dst, imageW*imageH*sizeof(TColor),cudaMemcpyHostToDevice); F1D<<<grid2, threads2>>>(dst, imageW, imageH,dst2); //cudaMemcpy(dst, dst3, imageW*imageH*sizeof(TColor),cudaMemcpyDeviceToHost); cudaFree(dst2); }* } This code works, but cant synchronise the array of image. and lead to many synchronise problem

Read the article

how to make a CUDA Histogram kernel?

- by kitw

Hi all, I am writing a CUDA kernel for Histogram on a picture, but I had no idea how to return a array from the kernel, and the array will change when other thread read it. Any possible solution for it? __global__ void Hist( TColor *dst, //input image int imageW, int imageH, int*data ){ const int ix = blockDim.x * blockIdx.x + threadIdx.x; const int iy = blockDim.y * blockIdx.y + threadIdx.y; if(ix < imageW && iy < imageH) { int pixel = get_red(dst[imageW * (iy) + (ix)]); //this assign specific RED value of image to pixel data[pixel] ++; // ?? problem statement ... } } @para d_dst: input image TColor is equals to float4. @para data: the array for histogram size [255] extern "C" void cuda_Hist(TColor *d_dst, int imageW, int imageH,int* data) { dim3 threads(BLOCKDIM_X, BLOCKDIM_Y); dim3 grid(iDivUp(imageW, BLOCKDIM_X), iDivUp(imageH, BLOCKDIM_Y)); Hist<<<grid, threads>>>(d_dst, imageW, imageH, data); }