In the following program, I compile a kernel for the first device on the first platform:
const char* kernel_source_code = R"(
__kernel void vectorAdd(
__global float * __restrict C,
__global float const * __restrict A,
__global float const * __restrict B,
unsigned long length)
{
int i = get_global_id(0);
if (i < length)
C[i] = A[i] + B[i];
}
)";
cl_int status;
cl_platform platform_id;
status = clGetPlatformIDs(1, &platform_id, nullptr);
ensure_success(status);
cl_device device_id;
status = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, nullptr);
ensure_success(status);
cl_context context_id = clCreateContext(nullptr, 1, &device_id, nullptr, nullptr, &status);
ensure_success(status);
size_t length = strlen(kernel_source_code);
cl_program program_id = clCreateProgramWithSource(context_id, 1, &kernel_source_code, &length, &status);
ensure_success(status);
status = clCompileProgram(
program_id,
1, &device_id, // single device
"", // no special options
0, nullptr, nullptr, // num headers, header sources, header names
nullptr, nullptr // no callback
);
ensure_success(status);
cl_kernel kernel = clCreateKernel(program_id, "vectorAdd", &status);
ensure_success(status);
but - the last API call, the kernel creation, fails with CL_INVALID_PROGRAM_EXECUTABLE, which means "there is no successfully built executable for [the] program".
Why am I getting this failure, if my compilation has succeeded?