By Bakus123


2015-05-06 16:55:20 8 Comments

I have problem with passing array of struct to gpu kernel. I based on this topic - cudaMemcpy segmentation fault and I wrote sth like this:

#include <stdio.h>
#include <stdlib.h>

struct Test {
    char *array;
};

__global__ void kernel(Test *dev_test) {
    for(int i=0; i < 5; i++) {
        printf("Kernel[0][i]: %c \n", dev_test[0].array[i]);
    }
}

int main(void) {

    int n = 4, size = 5;
    Test *dev_test, *test;

    test = (Test*)malloc(sizeof(Test)*n);
    for(int i = 0; i < n; i++)
        test[i].array = (char*)malloc(size * sizeof(char));

    for(int i=0; i < n; i++) {
        char temp[] = { 'a', 'b', 'c', 'd' , 'e' };
        memcpy(test[i].array, temp, size * sizeof(char));
    }

    cudaMalloc((void**)&dev_test, n * sizeof(Test));
    cudaMemcpy(dev_test, test, n * sizeof(Test), cudaMemcpyHostToDevice);
    for(int i=0; i < n; i++) {
        cudaMalloc((void**)&(test[i].array), size * sizeof(char));
        cudaMemcpy(&(dev_test[i].array), &(test[i].array), size * sizeof(char), cudaMemcpyHostToDevice);
    }

    kernel<<<1, 1>>>(dev_test);
    cudaDeviceSynchronize();

    //  memory free
    return 0;
}

There is no error, but displayed values in kernel are incorrect. What I'm doing wrong? Thank in advance for any help.

1 comments

@Robert Crovella 2015-05-06 18:11:18

  1. This is allocating a new pointer to host memory:

    test[i].array = (char*)malloc(size * sizeof(char));
    
  2. This is copying data to that region in host memory:

    memcpy(test[i].array, temp, size * sizeof(char));
    
  3. This is overwriting the previously allocated pointer to host memory (from step 1 above) with a new pointer to device memory:

    cudaMalloc((void**)&(test[i].array), size * sizeof(char));
    

After step 3, the data you set up in step 2 is entirely lost, and no longer accessible in any fashion. Referring to steps 3 and 4 in the question/answer you linked:

3.Create a separate int pointer on the host, let's call it myhostptr

4.cudaMalloc int storage on the device for myhostptr

You haven't done this. You did not create a separate pointer. You reused (erased, overwrote) an existing pointer, which was pointing to data you cared about on the host. This question/answer, also linked from the answer you linked, gives almost exactly the steps you need to follow, in code.

Here's a modified version of your code, which properly implements the missing steps 3 and 4 (and 5) that you didn't implement correctly according to the question/answer you linked: (refer to comments delineating steps 3,4,5)

$ cat t755.cu
#include <stdio.h>
#include <stdlib.h>

struct Test {
    char *array;
};

__global__ void kernel(Test *dev_test) {
    for(int i=0; i < 5; i++) {
        printf("Kernel[0][i]: %c \n", dev_test[0].array[i]);
    }
}

int main(void) {

    int n = 4, size = 5;
    Test *dev_test, *test;

    test = (Test*)malloc(sizeof(Test)*n);
    for(int i = 0; i < n; i++)
        test[i].array = (char*)malloc(size * sizeof(char));

    for(int i=0; i < n; i++) {
        char temp[] = { 'a', 'b', 'c', 'd' , 'e' };
        memcpy(test[i].array, temp, size * sizeof(char));
    }

    cudaMalloc((void**)&dev_test, n * sizeof(Test));
    cudaMemcpy(dev_test, test, n * sizeof(Test), cudaMemcpyHostToDevice);

    // Step 3:
    char *temp_data[n];
    // Step 4:
    for (int i=0; i < n; i++)
      cudaMalloc(&(temp_data[i]), size*sizeof(char));
    // Step 5:
    for (int i=0; i < n; i++)
      cudaMemcpy(&(dev_test[i].array), &(temp_data[i]), sizeof(char *), cudaMemcpyHostToDevice);
    // now copy the embedded data:
    for (int i=0; i < n; i++)
      cudaMemcpy(temp_data[i], test[i].array, size*sizeof(char), cudaMemcpyHostToDevice);

    kernel<<<1, 1>>>(dev_test);
    cudaDeviceSynchronize();

    //  memory free
    return 0;
}

$ nvcc -o t755 t755.cu
$ cuda-memcheck ./t755
========= CUDA-MEMCHECK
Kernel[0][i]: a
Kernel[0][i]: b
Kernel[0][i]: c
Kernel[0][i]: d
Kernel[0][i]: e
========= ERROR SUMMARY: 0 errors
$

Since the above methodology can be challenging for beginners, the usual advice is not to do it, but instead flatten your data structures. Flatten generally means to rearrange the data storage so as to remove the embedded pointers that have to be separately allocated.

A trivial example of flattening this data structure would be to use this instead:

struct Test {
    char array[5];
};

It's recognized of course that this particular approach would not serve every purpose, but it should illustrate the general idea/intent. With that modification, as an example, the code becomes much simpler:

$ cat t755.cu
#include <stdio.h>
#include <stdlib.h>

struct Test {
    char array[5];
};

__global__ void kernel(Test *dev_test) {
    for(int i=0; i < 5; i++) {
        printf("Kernel[0][i]: %c \n", dev_test[0].array[i]);
    }
}

int main(void) {

    int n = 4, size = 5;
    Test *dev_test, *test;

    test = (Test*)malloc(sizeof(Test)*n);

    for(int i=0; i < n; i++) {
        char temp[] = { 'a', 'b', 'c', 'd' , 'e' };
        memcpy(test[i].array, temp, size * sizeof(char));
    }

    cudaMalloc((void**)&dev_test, n * sizeof(Test));
    cudaMemcpy(dev_test, test, n * sizeof(Test), cudaMemcpyHostToDevice);

    kernel<<<1, 1>>>(dev_test);
    cudaDeviceSynchronize();

    //  memory free
    return 0;
}
$ nvcc -o t755 t755.cu
$ cuda-memcheck ./t755
========= CUDA-MEMCHECK
Kernel[0][i]: a
Kernel[0][i]: b
Kernel[0][i]: c
Kernel[0][i]: d
Kernel[0][i]: e
========= ERROR SUMMARY: 0 errors
$

@Bakus123 2015-05-06 18:24:48

thanks a lot. What do you mean by "flatten your data structures"?

@Robert Crovella 2015-05-06 18:43:34

updated my answer to respond to this question. However if you search on the CUDA tag you will find many references and examples for "flattening".

Related Questions

Sponsored Content

25 Answered Questions

[SOLVED] When should you use a class vs a struct in C++?

  • 2008-09-10 16:29:54
  • Alan Hinchcliffe
  • 383304 View
  • 897 Score
  • 25 Answer
  • Tags:   c++ oop class struct ooad

18 Answered Questions

[SOLVED] With arrays, why is it the case that a[5] == 5[a]?

28 Answered Questions

[SOLVED] When to use struct?

  • 2009-02-06 17:37:55
  • Alex Baranosky
  • 260061 View
  • 1352 Score
  • 28 Answer
  • Tags:   c# struct

11 Answered Questions

[SOLVED] Why isn't sizeof for a struct equal to the sum of sizeof of each member?

8 Answered Questions

[SOLVED] Difference between 'struct' and 'typedef struct' in C++?

  • 2009-03-04 20:41:12
  • criddell
  • 496850 View
  • 803 Score
  • 8 Answer
  • Tags:   c++ struct typedef

3 Answered Questions

3 Answered Questions

[SOLVED] Increasing The Size of Memory Allocated to a Struct via Malloc

  • 2011-09-15 19:06:45
  • Daniel Scocco
  • 5161 View
  • 3 Score
  • 3 Answer
  • Tags:   c struct malloc

1 Answered Questions

[SOLVED] MPI dynamic array using malloc

  • 2012-11-15 16:44:49
  • rkrara
  • 1063 View
  • 1 Score
  • 1 Answer
  • Tags:   c arrays

Sponsored Content