int ua - Page 35 - Developer IT

Transferring data from 2d Dynamic array in C to CUDA and back

- by Soumya

I have a dynamically declared 2D array in my C program, the contents of which I want to transfer to a CUDA kernel for further processing. Once processed, I want to populate the dynamically declared 2D array in my C code with the CUDA processed data. I am able to do this with static 2D C arrays but not with dynamically declared C arrays. Any inputs would be welcome! I mean the dynamic array of dynamic arrays. The test code that I have written is as below. #include "cuda_runtime.h" #include "device_launch_parameters.h" #include <stdio.h> #include <conio.h> #include <math.h> #include <stdlib.h> const int nItt = 10; const int nP = 5; __device__ int d_nItt = 10; __device__ int d_nP = 5; __global__ void arr_chk(float *d_x_k, float *d_w_k, int row_num) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; int index1 = (row_num * d_nP) + index; if ( (index1 >= row_num * d_nP) && (index1 < ((row_num +1)*d_nP))) //Modifying only one row data pertaining to one particular iteration { d_x_k[index1] = row_num * d_nP; d_w_k[index1] = index; } } float **mat_create2(int r, int c) { float **dynamicArray; dynamicArray = (float **) malloc (sizeof (float)*r); for(int i=0; i<r; i++) { dynamicArray[i] = (float *) malloc (sizeof (float)*c); for(int j= 0; j<c;j++) { dynamicArray[i][j] = 0; } } return dynamicArray; } /* Freeing memory - here only number of rows are passed*/ void cleanup2d(float **mat_arr, int x) { int i; for(i=0; i<x; i++) { free(mat_arr[i]); } free(mat_arr); } int main() { //float w_k[nItt][nP]; //Static array declaration - works! //float x_k[nItt][nP]; // if I uncomment this dynamic declaration and comment the static one, it does not work..... float **w_k = mat_create2(nItt,nP); float **x_k = mat_create2(nItt,nP); float *d_w_k, *d_x_k; // Device variables for w_k and x_k int nblocks, blocksize, nthreads; for(int i=0;i<nItt;i++) { for(int j=0;j<nP;j++) { x_k[i][j] = (nP*i); w_k[i][j] = j; } } for(int i=0;i<nItt;i++) { for(int j=0;j<nP;j++) { printf("x_k[%d][%d] = %f\t",i,j,x_k[i][j]); printf("w_k[%d][%d] = %f\n",i,j,w_k[i][j]); } } int size1 = nItt * nP * sizeof(float); printf("\nThe array size in memory bytes is: %d\n",size1); cudaMalloc( (void**)&d_x_k, size1 ); cudaMalloc( (void**)&d_w_k, size1 ); if((nP*nItt)<32) { blocksize = nP*nItt; nblocks = 1; } else { blocksize = 32; // Defines the number of threads running per block. Taken equal to warp size nthreads = blocksize; nblocks = ceil(float(nP*nItt) / nthreads); // Calculated total number of blocks thus required } for(int i = 0; i< nItt; i++) { cudaMemcpy( d_x_k, x_k, size1,cudaMemcpyHostToDevice ); //copy of x_k to device cudaMemcpy( d_w_k, w_k, size1,cudaMemcpyHostToDevice ); //copy of w_k to device arr_chk<<<nblocks, blocksize>>>(d_x_k,d_w_k,i); cudaMemcpy( x_k, d_x_k, size1, cudaMemcpyDeviceToHost ); cudaMemcpy( w_k, d_w_k, size1, cudaMemcpyDeviceToHost ); } printf("\nVerification after return from gpu\n"); for(int i = 0; i<nItt; i++) { for(int j=0;j<nP;j++) { printf("x_k[%d][%d] = %f\t",i,j,x_k[i][j]); printf("w_k[%d][%d] = %f\n",i,j,w_k[i][j]); } } cudaFree( d_x_k ); cudaFree( d_w_k ); cleanup2d(x_k,nItt); cleanup2d(w_k,nItt); getch(); return 0;

Search Results

Search found 22043 results on 882 pages for 'int ua'.

Page 35/882 | < Previous Page | 31 32 33 34 35 36 37 38 39 40 41 42 | Next Page >

- by Soumya

- by SigTerm

- by YoungSalafi

- by Mike Sawayda

- by Reed

- by Adrian Marszalek

- by Jalpesh P. Vadgama

- by Jixi

- by Vee

- by user121917

- by user1905192

- by Zach

- by Alberto

- by Darryl Gove

- by DigiMortal

- by Hemant

- by Josh

- by gam3

- by Pete

- by xaliap81

- by slomojo

- by MrPlosion

- by ???? ?????

- by Prog

- by Ganesh Kundapur

< Previous Page | 31 32 33 34 35 36 37 38 39 40 41 42 | Next Page >