In my code, I have a statically allocated array in global memory (i.e., allocated using __device__), which I want to sort using thrust::sort, which isn't working. All of the examples on this topic are using CUDA runtime allocated arrays (using cudaMalloc). Is there any way I can sort a statically allocated array?
I guess it has something to do with statically allocated memory not being accessible from the host. Using cudaMalloc-allocated arrays, it is working fine. However, I want to avoid using this type of allocation since static allocation allows for easier access to the data from device code (doesn't it?).
Minimal (not-) working example:
#include <stdio.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#define N 4
typedef struct element {
int key;
int value;
__host__ __device__ bool operator<(element e) const
{ return key > e.key; }
} element;
__device__ element array[N];
__global__ void init() {
for (int i = 0; i < N; ++i) {
array[N - i - 1].key = i;
}
}
__global__ void print_array() {
for (int i = 0; i < N; ++i) {
printf("%d ", array[i].key);
}
printf("\n");
}
int main(void) {
thrust::device_ptr<element> array_first(array);
init<<<1,1>>>();
printf("unsorted: ");
print_array<<<1, 1>>>();
cudaDeviceSynchronize();
thrust::sort(array_first, array_first + N);
printf("sorted: ");
print_array<<<1, 1>>>();
cudaDeviceSynchronize();
}
__device__variable in that manner from a__host__function.cudaGetSymbolAddresscall would be required.