/* * clTest.c -- * * Run a trivial OpenCL kernel and verify its results. * * [14 August 2009, Jeremy Sugerman] * */ #include #include #include #include #include #include #include #include #include #include "CL/cl.h" #define Warning(...) fprintf(stderr, __VA_ARGS__) typedef struct Opts { size_t numInstances; /* * Boolean options below here, all default to false (zero). */ int verify; int timing; int quiet; } Opts; /* * Sigh. OpenCL is designed to dump micromanagement onto the application. * It has a ton of context-like state that any reasonable implementation has * to drag around. In addition to the context, there's at least the list of * devices. For most simple cases, it makes lump the command queue (or all * the command queues) in with the context, too. */ typedef struct CLGoo { cl_context ctx; cl_device_id *devices; cl_uint numDevices; /* Could also have one queue per device and round-robin or something */ cl_command_queue queue; } CLGoo; /* * Usage -- * * Prints the usage message and exits. * * Results: * void, but calls exit(1)... */ static void Usage(const char *progName) { Warning("Usage: %s [options]\n", progName); Warning("Options:\n"); Warning(" -h, --help This message\n"); Warning(" -q, --quiet Don't dump the kernel output buffer\n"); Warning(" -s, --size Number of threads to run\n"); Warning(" -t, --timing Gather OpenCL profiling information\n"); Warning(" -v, --verify Check the output matches expectations\n"); exit(1); } /* * ParseOpts -- * * Converts the commandline parameters into their internal * representation. * * Results: * void, opts is initialized. */ static void ParseOpts(Opts *opts, int argc, char *argv[]) { int opt; static struct option longOptions[] = { {"help", 0, 0, 'h'}, {"quiet", 0, 0, 'q'}, {"size", 1, 0, 's'}, {"timing", 0, 0, 't'}, {"verify", 0, 0, 'v'}, }; while ((opt = getopt_long(argc, argv, "hqs:tv", longOptions, NULL)) != EOF) { switch(opt) { case 'q': opts->quiet = 1; break; case 's': opts->numInstances = strtoul(optarg, NULL, 0); break; case 't': opts->timing = 1; break; case 'v': opts->verify = 1; break; case 'h': default: Usage(argv[0]); break; } } return; } /* * FileToString -- * * Utility function that reads the specified file and returns a string * containing its contents. * * Results: * stringified version of the file on success, else NULL. */ static char * FileToString(const char *fileName) { size_t numBytes, numRead; struct stat fileInfo; char *contents; FILE *file; if (stat(fileName, &fileInfo) != 0) { Warning("FileToString: Unable to stat %s: %s\n", fileName, strerror(errno)); return NULL; } if ((file = fopen(fileName, "rb")) == NULL) { Warning("FileToString: Unable to open %s: %s\n", fileName, strerror(errno)); return NULL; } numBytes = fileInfo.st_size + 1; if ((contents = (char *) malloc(numBytes)) == NULL) { Warning("FileToString: Unable to allocate %d bytes!\n", numBytes); return NULL; } if ((numRead = fread(contents, 1, fileInfo.st_size, file)) != fileInfo.st_size) { Warning("FileToString: Expected %d bytes, but only read %d!\n", numBytes, numRead); } contents[numRead] = '\0'; return contents; } /* * DumpBuffer -- * * Utility function that dumps an array of ints. * * Results: * void (but spews to stdout). */ static void DumpBuffer(const uint *buffer, size_t len) { size_t ii; for (ii = 0; ii < len; ii++) { if (ii % 10 == 0) { printf("\n%3d:", ii); } printf(" 0x%03x", buffer[ii]); } printf("\n"); } /* * StrCLError -- * * Utility function that converts an OpenCL status into a human * readable string. * * Results: * const char * pointer to a static string. */ static const char * StrCLError(cl_int status) { static struct { cl_int code; const char *msg; } error_table[] = { { CL_SUCCESS, "success" }, { CL_DEVICE_NOT_FOUND, "device not found", }, { CL_DEVICE_NOT_AVAILABLE, "device not available", }, { CL_COMPILER_NOT_AVAILABLE, "compiler not available", }, { CL_MEM_OBJECT_ALLOCATION_FAILURE, "mem object allocation failure", }, { CL_OUT_OF_RESOURCES, "out of resources", }, { CL_OUT_OF_HOST_MEMORY, "out of host memory", }, { CL_PROFILING_INFO_NOT_AVAILABLE, "profiling not available", }, { CL_MEM_COPY_OVERLAP, "memcopy overlaps", }, { CL_IMAGE_FORMAT_MISMATCH, "image format mismatch", }, { CL_IMAGE_FORMAT_NOT_SUPPORTED, "image format not supported", }, { CL_BUILD_PROGRAM_FAILURE, "build program failed", }, { CL_MAP_FAILURE, "map failed", }, { CL_INVALID_VALUE, "invalid value", }, { CL_INVALID_DEVICE_TYPE, "invalid device type", }, { 0, NULL }, }; static char unknown[25]; int ii; for (ii = 0; error_table[ii].msg != NULL; ii++) { if (error_table[ii].code == status) { return error_table[ii].msg; } } snprintf(unknown, sizeof unknown, "unknown error %d", status); return unknown; } /* * HandleCLError -- * * Error handler registered with cl_context's. It's not clear to me * there's anything interesting to be done here, but it's always nice * to know when errors have occurred. * * Results: * void. */ static void HandleCLError(const char *errInfo, const void *opaque, size_t opaqueSize, void *userData) { Warning("Unexpected OpenCL error: %s\n", errInfo); if (opaqueSize > 0) { int ii; Warning(" %d bytes of vendor data.", opaqueSize); for (ii = 0; ii < opaqueSize; ii++) { char c = ((const char *) opaque)[ii]; if (ii % 10 == 0) { Warning("\n %3d:", ii); } Warning(" 0x%02x %c", c, isprint(c) ? c : '.'); } Warning("\n"); } } /* * LoadKernel -- * * Utility function that does all the lifting required to produce a * kernel object corresponding to the requested kernel in the given * file. * * Results: * 0 on error, 1 on success. */ static int LoadKernel(cl_kernel *kernel, const char *fileName, const char *kernelName, CLGoo *goo) { const char *source = FileToString(fileName); size_t sourceSize[] = { strlen(source) }; cl_program program; cl_int status; program = clCreateProgramWithSource(goo->ctx, 1, &source, sourceSize, &status); if (status != CL_SUCCESS) { Warning("clCreateProgramWithSource failed: %s", StrCLError(status)); return 0; } status = clBuildProgram(program, 1, goo->devices, NULL, NULL, NULL); if (status != CL_SUCCESS) { Warning("clBuildProgram failed: %s", StrCLError(status)); return 0; } *kernel = clCreateKernel(program, kernelName, &status); if (status != CL_SUCCESS) { Warning("clCreateKernel(%s) failed: %s", kernelName, StrCLError(status)); return 0; } /* * This is an experiment. If reasonable people wrote the spec and * implemented the run-time, then I can release the program here and * the kernel will have either cached what it needs locally or * incremented the program ref count on its own behalf. If. */ if (clReleaseProgram(program) != CL_SUCCESS) { Warning("clReleaseProgram() failed. Bummer.\n"); /* Do nothing. This isn't a hard failure. */ } return 1; } /* * InitializeCL -- * * Context, Device list, Command Queue are set up. * Calls are made to set up OpenCL memory buffers that this program uses * and to load the programs into memory and get kernel handles. * Load and build OpenCL program and get kernel handles. * Set up OpenCL memory buffers used by this program. * * Results: * 1 on success, 0 on failure. */ static int InitializeCL(CLGoo *goo, const Opts *opts) { cl_command_queue_properties queueProps; size_t deviceListSize; cl_int status; memset(goo, 0, sizeof *goo); goo->ctx = clCreateContextFromType(NULL, CL_DEVICE_TYPE_DEFAULT, HandleCLError, NULL, &status); if (status != CL_SUCCESS) { Warning("clCreateContextFromType failed: %s", StrCLError(status)); return 0; } status = clGetContextInfo(goo->ctx, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); if (status != CL_SUCCESS) { Warning("clGetContextInfo failed: %s", StrCLError(status)); return 0; } goo->numDevices = deviceListSize / sizeof(cl_device_id); if ((goo->devices = (cl_device_id *) malloc(deviceListSize)) == NULL) { Warning("Failed to allocate memory (deviceList)."); return 0; } /* Now, get the device list data */ status = clGetContextInfo(goo->ctx, CL_CONTEXT_DEVICES, deviceListSize, goo->devices, NULL); if (status != CL_SUCCESS) { Warning("clGetGetContextInfo failed: %s", StrCLError(status)); return 0; } queueProps = opts->timing ? CL_QUEUE_PROFILING_ENABLE : 0; goo->queue = clCreateCommandQueue(goo->ctx, goo->devices[0], queueProps, &status); if (status != CL_SUCCESS) { Warning("clCreateCommandQueue failed: %s", StrCLError(status)); return 0; } return 1; } /* * RunTest -- * * Creates / sets up the CL kernel and its arguments, then runs it and * dumps the results. * * Results: * 1 on success, 0 on failure. *Lots* of side effects. */ static int RunTest(CLGoo *goo, const Opts *opts) { cl_kernel kernel; cl_mem bufHandle; cl_int status; cl_event event; cl_mem_flags flags; uint *buffer, numBytes; if (!LoadKernel(&kernel, "clTest-kernels.cl", "TestWriteTID", goo)) { return 0; } numBytes = sizeof *buffer * opts->numInstances; if ((buffer = (uint *) malloc(numBytes)) == NULL) { Warning("Unable to allocate %d bytes for %d instances\n", numBytes, opts->numInstances); return 0; } flags = CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR; bufHandle = clCreateBuffer(goo->ctx, flags, numBytes, buffer, &status); if (status != CL_SUCCESS) { Warning("clCreateBuffer failed: %s\n", StrCLError(status)); return 0; } status = clSetKernelArg(kernel, 0, sizeof(bufHandle), &bufHandle); if (status != CL_SUCCESS) { Warning("clSetKernelArg() failed to bind output buffer: %s\n", StrCLError(status)); return 0; } /* * Now actually run the darn thing. Everything to this point has been * setup (and everything after it is cleanup). Hooray. */ status = clEnqueueNDRangeKernel(goo->queue, kernel, 1, NULL, &opts->numInstances, NULL, 0, NULL, &event); if (status != CL_SUCCESS) { Warning("Failed to launch kernel: %s\n", StrCLError(status)); return 0; } if ((status = clWaitForEvents(1, &event)) != CL_SUCCESS) { Warning("clWaitForEvents() failed: %s\n", StrCLError(status)); clFinish(goo->queue); /* Resort to the sledgehammer. */ } if (opts->timing) { long long start, end; double total; status = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof start, &start, NULL); if (status != CL_SUCCESS) { Warning("clGetEventProfilingInfo(COMMAND_START) failed: %s\n", StrCLError(status)); start = 0; } status = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof end, &end, NULL); if (status != CL_SUCCESS) { Warning("clGetEventProfilingInfo(COMMAND_END) failed: %s\n", StrCLError(status)); end = 0; } total = (double)(end - start) / 1e6; /* Convert nanoseconds to msecs */ printf("Profiling: Total kernel time was %5.2f msecs.\n", total); } if (!opts->quiet){ /* * XXX Do I need to call clEnqueueMapBuffer() before reading buffer[]? * The specification does not read very clearly about how * CL_MEM_USE_HOST_PTR operates. It clearly states that the device can * cache the memory while running a kernel, but I don't see any statement * of obligation (or lack thereof) to update the host when a kernel * finishes. * * Personally, I think it would be a bit of a silly requirement to * enqueue and wait/finish a MapBuffer in addition to the kernel itself. * I mean, if I went to the trouble of using CL_MEM_USE_HOST_PTR then * presumably I was serious. * * Anyhow, this seems to work with the AMD CPU OpenCL libraries without * any additional synchronization. */ DumpBuffer(buffer, opts->numInstances); } if (opts->verify) { int ii, mismatch; for (mismatch = ii = 0; ii< opts->numInstances; ii++) { if (buffer[ii] != ii) { Warning("Verify: Mismatch on entry %d: Got %d, expected %d!\n", ii, buffer[ii], ii); mismatch = 1; } } if (mismatch) { exit(1); } } clReleaseEvent(event); clReleaseMemObject(bufHandle); free(buffer); clReleaseKernel(kernel); return 1; } /* * CleanupCL -- * * Releases all the general OpenCL state (context and all its * associated junk). * * Results: * void. * Memory is freed, inputs are trashed, chaos ensues. */ static void CleanupCL(CLGoo *goo) { /* Releases OpenCL resources (Context, Memory etc.) */ cl_int status; if (goo->queue != NULL) { if ((status = clReleaseCommandQueue(goo->queue)) != CL_SUCCESS) { Warning("clReleaseCommandQueue failed: %s", StrCLError(status)); } goo->queue = NULL; } free(goo->devices); /* free(NULL) is just fine. */ goo->numDevices = 0; if (goo->ctx != NULL) { if ((status = clReleaseContext(goo->ctx)) != CL_SUCCESS) { Warning("clReleaseContext failed: %s", StrCLError(status)); } goo->ctx = NULL; } return; } int main(int argc, char * argv[]) { Opts opts = { 128, /* numInstances */ }; CLGoo goo; ParseOpts(&opts, argc, argv); if (!InitializeCL(&goo, &opts)) { exit(1); } if (!RunTest(&goo, &opts)) { exit(1); } CleanupCL(&goo); return 0; }