Search Results

Search found 1 results on 1 pages for 'xhe8'.

Page 1/1 | 1 

  • How to copy the memeory allocated in device function back to main memory

    - by xhe8
    I have a CUDA program containing a host function and a device function Execute(). In the host function, I allocate a global memory output which will then be passed to the device function and used to store the address of the global memory allocated within the device function. I want to access the in-kernel allocated memory in the host function. The following is the code: #include <stdio.h> typedef struct { int * p; int num; } Structure_A; \__global__ void Execute(Structure_A *output); int main(){ Structure_A *output; cudaMalloc((void***)&output,sizeof(Structure_A)*1); dim3 dimBlockExecute(1,1); dim3 dimGridExecute(1,1); Execute<<<dimGridExecute,dimBlockExecute>>>(output); Structure_A * output_cpu; int * p_cpu; cudaError_t err; output_cpu= (Structure_A*)malloc(1); err=cudaMemcpy(output_cpu,output,sizeof(Structure_A),cudaMemcpyDeviceToHost); if( err != cudaSuccess) { printf("CUDA error a: %s\n", cudaGetErrorString(err)); exit(-1); } p_cpu=(int *)malloc(1); err=cudaMemcpy(p_cpu,output_cpu[0].p,sizeof(int),cudaMemcpyDeviceToHost); if( err != cudaSuccess) { printf("CUDA error b: %s\n", cudaGetErrorString(err)); exit(-1); } printf("output=(%d,%d)\n",output_cpu[0].num,p_cpu[0]); return 0; } \__global__ void Execute(Structure_A *output){ int thid=threadIdx.x; output[thid].p= (int*)malloc(thid+1); output[thid].num=(thid+1); output[thid].p[0]=5; } I can compile the program. But when I run it, I got a error showing that there is a invalid argument in the following memory copy function. "err=cudaMemcpy(p_cpu,output_cpu[0].p,sizeof(int),cudaMemcpyDeviceToHost);" CUDA version is 4.2. CUDA card: Tesla C2075 OS: x86_64 GNU/Linux

    Read the article

1