CUEW: Update to latest version

It is now updated against CUDA Toolkit 7.5. Currently should be no functional
changes, just begin some ground work for the future.
This commit is contained in:
Sergey Sharybin 2015-11-25 23:27:40 +05:00
parent 816cdf262b
commit d2a822fe07
4 changed files with 122 additions and 54 deletions

View File

@ -101,7 +101,7 @@ int cuewCompilerVersion(void) {
while (!feof(pipe)) {
if (fgets(buf, sizeof(buf), pipe) != NULL) {
strncat(output, buf, sizeof(output) - strlen(output));
strncat(output, buf, sizeof(output) - strlen(output) - 1);
}
}

View File

@ -276,7 +276,11 @@ def parse_files():
if line[0].isspace() and line.lstrip().startswith("#define"):
line = line[12:-1]
token = line.split()
if len(token) == 2 and token[1].endswith("_v2"):
if len(token) == 2 and (token[1].endswith("_v2") or
token[1].endswith("_v2)")):
if token[1].startswith('__CUDA_API_PTDS') or \
token[1].startswith('__CUDA_API_PTSZ'):
token[1] = token[1][16:-1]
DEFINES_V2.append(token)
v = FuncDefVisitor()
@ -560,7 +564,8 @@ def print_implementation():
if error in CUDA_ERRORS:
str = CUDA_ERRORS[error]
else:
str = error[11:]
temp = error[11:].replace('_', ' ')
str = temp[0] + temp[1:].lower()
print(" case %s: return \"%s\";" % (error, str))
print(" default: return \"Unknown CUDA error value\";")

View File

@ -27,13 +27,16 @@ extern "C" {
#define CUEW_VERSION_MAJOR 1
#define CUEW_VERSION_MINOR 2
#define CUDA_VERSION 6000
#define CUDA_VERSION 7050
#define CU_IPC_HANDLE_SIZE 64
#define CU_STREAM_LEGACY ((CUstream)0x1)
#define CU_STREAM_PER_THREAD ((CUstream)0x2)
#define CU_MEMHOSTALLOC_PORTABLE 0x01
#define CU_MEMHOSTALLOC_DEVICEMAP 0x02
#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
#define CU_MEMHOSTREGISTER_PORTABLE 0x01
#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
#define CU_MEMHOSTREGISTER_IOMEMORY 0x04
#define CUDA_ARRAY3D_LAYERED 0x01
#define CUDA_ARRAY3D_2DARRAY 0x01
#define CUDA_ARRAY3D_SURFACE_LDST 0x02
@ -100,10 +103,16 @@ extern "C" {
#define cuCtxPushCurrent cuCtxPushCurrent_v2
#define cuStreamDestroy cuStreamDestroy_v2
#define cuEventDestroy cuEventDestroy_v2
#define cuLinkCreate cuLinkCreate_v2
#define cuLinkAddData cuLinkAddData_v2
#define cuLinkAddFile cuLinkAddFile_v2
#define cuMemHostRegister cuMemHostRegister_v2
#define cuGraphicsResourceSetMapFlags cuGraphicsResourceSetMapFlags_v2
#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v2
#define cuGLCtxCreate cuGLCtxCreate_v2
#define cuGLMapBufferObject cuGLMapBufferObject_v2
#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
#define cuGLGetDevices cuGLGetDevices_v2
/* Types. */
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
@ -172,6 +181,11 @@ typedef enum CUevent_flags_enum {
CU_EVENT_INTERPROCESS = 0x4,
} CUevent_flags;
typedef enum CUoccupancy_flags_enum {
CU_OCCUPANCY_DEFAULT = 0x0,
CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
} CUoccupancy_flags;
typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
@ -381,7 +395,9 @@ typedef enum CUjit_target_enum {
CU_TARGET_COMPUTE_30 = 30,
CU_TARGET_COMPUTE_32 = 32,
CU_TARGET_COMPUTE_35 = 35,
CU_TARGET_COMPUTE_37 = 37,
CU_TARGET_COMPUTE_50 = 50,
CU_TARGET_COMPUTE_52 = 52,
} CUjit_target;
typedef enum CUjit_fallback_enum {
@ -474,6 +490,7 @@ typedef enum cudaError_enum {
CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
CUDA_ERROR_INVALID_PTX = 218,
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
CUDA_ERROR_INVALID_SOURCE = 300,
CUDA_ERROR_FILE_NOT_FOUND = 301,
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
@ -506,6 +523,7 @@ typedef enum cudaError_enum {
} CUresult;
typedef void* CUstreamCallback;
typedef size_t* CUoccupancyB2DSize;
typedef struct CUDA_MEMCPY2D_st {
size_t srcXInBytes;
@ -730,6 +748,11 @@ typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease(CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned flags);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned* flags, int* active);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset(CUdevice dev);
typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned flags, CUdevice dev);
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
@ -737,6 +760,7 @@ typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx);
typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx);
typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device);
typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned* flags);
typedef CUresult CUDAAPI tcuCtxSynchronize(void);
typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit);
@ -757,9 +781,9 @@ typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod,
typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
typedef CUresult CUDAAPI tcuLinkCreate(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
typedef CUresult CUDAAPI tcuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues);
typedef CUresult CUDAAPI tcuLinkAddFile(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues);
typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned numOptions, CUjit_option* options, void* optionValues);
typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned numOptions, CUjit_option* options, void* optionValues);
typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut);
typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total);
@ -780,7 +804,7 @@ typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandl
typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned Flags);
typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemHostRegister(void* p, size_t bytesize, unsigned Flags);
typedef CUresult CUDAAPI tcuMemHostRegister_v2(void* p, size_t bytesize, unsigned Flags);
typedef CUresult CUDAAPI tcuMemHostUnregister(void* p);
typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
@ -828,6 +852,7 @@ typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipma
typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned numAttributes, CUpointer_attribute* attributes, void* data, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned Flags);
typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned flags, int priority);
typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority);
@ -858,6 +883,10 @@ typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned flags);
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned flags);
typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned Flags);
typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned Flags);
typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
@ -900,14 +929,14 @@ typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resour
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned arrayIndex, unsigned mipLevel);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned flags);
typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned flags);
typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned count, CUgraphicsResource* resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGetExportTable(const void* ppExportTable, const CUuuid* pExportTableId);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned Flags);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned Flags);
typedef CUresult CUDAAPI tcuGLGetDevices(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList);
typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned cudaDeviceCount, CUGLDeviceList deviceList);
typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned Flags, CUdevice device);
typedef CUresult CUDAAPI tcuGLInit(void);
typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
@ -931,6 +960,11 @@ extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
extern tcuDeviceGetProperties *cuDeviceGetProperties;
extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
extern tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
extern tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
extern tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
extern tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
extern tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
extern tcuCtxCreate_v2 *cuCtxCreate_v2;
extern tcuCtxDestroy_v2 *cuCtxDestroy_v2;
extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
@ -938,6 +972,7 @@ extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
extern tcuCtxSetCurrent *cuCtxSetCurrent;
extern tcuCtxGetCurrent *cuCtxGetCurrent;
extern tcuCtxGetDevice *cuCtxGetDevice;
extern tcuCtxGetFlags *cuCtxGetFlags;
extern tcuCtxSynchronize *cuCtxSynchronize;
extern tcuCtxSetLimit *cuCtxSetLimit;
extern tcuCtxGetLimit *cuCtxGetLimit;
@ -958,9 +993,9 @@ extern tcuModuleGetFunction *cuModuleGetFunction;
extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
extern tcuModuleGetTexRef *cuModuleGetTexRef;
extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
extern tcuLinkCreate *cuLinkCreate;
extern tcuLinkAddData *cuLinkAddData;
extern tcuLinkAddFile *cuLinkAddFile;
extern tcuLinkCreate_v2 *cuLinkCreate_v2;
extern tcuLinkAddData_v2 *cuLinkAddData_v2;
extern tcuLinkAddFile_v2 *cuLinkAddFile_v2;
extern tcuLinkComplete *cuLinkComplete;
extern tcuLinkDestroy *cuLinkDestroy;
extern tcuMemGetInfo_v2 *cuMemGetInfo_v2;
@ -981,7 +1016,7 @@ extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
extern tcuIpcGetMemHandle *cuIpcGetMemHandle;
extern tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
extern tcuMemHostRegister *cuMemHostRegister;
extern tcuMemHostRegister_v2 *cuMemHostRegister_v2;
extern tcuMemHostUnregister *cuMemHostUnregister;
extern tcuMemcpy *cuMemcpy;
extern tcuMemcpyPeer *cuMemcpyPeer;
@ -1029,6 +1064,7 @@ extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
extern tcuPointerGetAttribute *cuPointerGetAttribute;
extern tcuPointerSetAttribute *cuPointerSetAttribute;
extern tcuPointerGetAttributes *cuPointerGetAttributes;
extern tcuStreamCreate *cuStreamCreate;
extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
extern tcuStreamGetPriority *cuStreamGetPriority;
@ -1059,6 +1095,10 @@ extern tcuLaunch *cuLaunch;
extern tcuLaunchGrid *cuLaunchGrid;
extern tcuLaunchGridAsync *cuLaunchGridAsync;
extern tcuParamSetTexRef *cuParamSetTexRef;
extern tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
extern tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
extern tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
extern tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
extern tcuTexRefSetArray *cuTexRefSetArray;
extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
@ -1101,14 +1141,14 @@ extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
extern tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
extern tcuGraphicsMapResources *cuGraphicsMapResources;
extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
extern tcuGetExportTable *cuGetExportTable;
extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
extern tcuGLGetDevices *cuGLGetDevices;
extern tcuGLGetDevices_v2 *cuGLGetDevices_v2;
extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
extern tcuGLInit *cuGLInit;
extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject;

View File

@ -36,7 +36,7 @@
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibrary(path)
# define dynamic_library_open(path) LoadLibraryA(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
@ -70,6 +70,11 @@ tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
tcuDeviceGetAttribute *cuDeviceGetAttribute;
tcuDeviceGetProperties *cuDeviceGetProperties;
tcuDeviceComputeCapability *cuDeviceComputeCapability;
tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
tcuCtxCreate_v2 *cuCtxCreate_v2;
tcuCtxDestroy_v2 *cuCtxDestroy_v2;
tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
@ -77,6 +82,7 @@ tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
tcuCtxSetCurrent *cuCtxSetCurrent;
tcuCtxGetCurrent *cuCtxGetCurrent;
tcuCtxGetDevice *cuCtxGetDevice;
tcuCtxGetFlags *cuCtxGetFlags;
tcuCtxSynchronize *cuCtxSynchronize;
tcuCtxSetLimit *cuCtxSetLimit;
tcuCtxGetLimit *cuCtxGetLimit;
@ -97,9 +103,9 @@ tcuModuleGetFunction *cuModuleGetFunction;
tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
tcuModuleGetTexRef *cuModuleGetTexRef;
tcuModuleGetSurfRef *cuModuleGetSurfRef;
tcuLinkCreate *cuLinkCreate;
tcuLinkAddData *cuLinkAddData;
tcuLinkAddFile *cuLinkAddFile;
tcuLinkCreate_v2 *cuLinkCreate_v2;
tcuLinkAddData_v2 *cuLinkAddData_v2;
tcuLinkAddFile_v2 *cuLinkAddFile_v2;
tcuLinkComplete *cuLinkComplete;
tcuLinkDestroy *cuLinkDestroy;
tcuMemGetInfo_v2 *cuMemGetInfo_v2;
@ -120,7 +126,7 @@ tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
tcuIpcGetMemHandle *cuIpcGetMemHandle;
tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
tcuMemHostRegister *cuMemHostRegister;
tcuMemHostRegister_v2 *cuMemHostRegister_v2;
tcuMemHostUnregister *cuMemHostUnregister;
tcuMemcpy *cuMemcpy;
tcuMemcpyPeer *cuMemcpyPeer;
@ -168,6 +174,7 @@ tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
tcuPointerGetAttribute *cuPointerGetAttribute;
tcuPointerSetAttribute *cuPointerSetAttribute;
tcuPointerGetAttributes *cuPointerGetAttributes;
tcuStreamCreate *cuStreamCreate;
tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
tcuStreamGetPriority *cuStreamGetPriority;
@ -198,6 +205,10 @@ tcuLaunch *cuLaunch;
tcuLaunchGrid *cuLaunchGrid;
tcuLaunchGridAsync *cuLaunchGridAsync;
tcuParamSetTexRef *cuParamSetTexRef;
tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
tcuTexRefSetArray *cuTexRefSetArray;
tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
@ -240,14 +251,14 @@ tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGetExportTable *cuGetExportTable;
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
tcuGLGetDevices *cuGLGetDevices;
tcuGLGetDevices_v2 *cuGLGetDevices_v2;
tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
tcuGLInit *cuGLInit;
tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
@ -328,6 +339,11 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
CUDA_LIBRARY_FIND(cuDeviceGetProperties);
CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRetain);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRelease);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxSetFlags);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxGetState);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxReset);
CUDA_LIBRARY_FIND(cuCtxCreate_v2);
CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
@ -335,6 +351,7 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuCtxSetCurrent);
CUDA_LIBRARY_FIND(cuCtxGetCurrent);
CUDA_LIBRARY_FIND(cuCtxGetDevice);
CUDA_LIBRARY_FIND(cuCtxGetFlags);
CUDA_LIBRARY_FIND(cuCtxSynchronize);
CUDA_LIBRARY_FIND(cuCtxSetLimit);
CUDA_LIBRARY_FIND(cuCtxGetLimit);
@ -355,9 +372,9 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
CUDA_LIBRARY_FIND(cuModuleGetTexRef);
CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
CUDA_LIBRARY_FIND(cuLinkCreate);
CUDA_LIBRARY_FIND(cuLinkAddData);
CUDA_LIBRARY_FIND(cuLinkAddFile);
CUDA_LIBRARY_FIND(cuLinkCreate_v2);
CUDA_LIBRARY_FIND(cuLinkAddData_v2);
CUDA_LIBRARY_FIND(cuLinkAddFile_v2);
CUDA_LIBRARY_FIND(cuLinkComplete);
CUDA_LIBRARY_FIND(cuLinkDestroy);
CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
@ -378,7 +395,7 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
CUDA_LIBRARY_FIND(cuMemHostRegister);
CUDA_LIBRARY_FIND(cuMemHostRegister_v2);
CUDA_LIBRARY_FIND(cuMemHostUnregister);
CUDA_LIBRARY_FIND(cuMemcpy);
CUDA_LIBRARY_FIND(cuMemcpyPeer);
@ -426,6 +443,7 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
CUDA_LIBRARY_FIND(cuPointerGetAttribute);
CUDA_LIBRARY_FIND(cuPointerSetAttribute);
CUDA_LIBRARY_FIND(cuPointerGetAttributes);
CUDA_LIBRARY_FIND(cuStreamCreate);
CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
CUDA_LIBRARY_FIND(cuStreamGetPriority);
@ -456,6 +474,10 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuLaunchGrid);
CUDA_LIBRARY_FIND(cuLaunchGridAsync);
CUDA_LIBRARY_FIND(cuParamSetTexRef);
CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessor);
CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags);
CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSize);
CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSizeWithFlags);
CUDA_LIBRARY_FIND(cuTexRefSetArray);
CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
@ -498,14 +520,14 @@ int cuewInit(void) {
CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags_v2);
CUDA_LIBRARY_FIND(cuGraphicsMapResources);
CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
CUDA_LIBRARY_FIND(cuGetExportTable);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
CUDA_LIBRARY_FIND(cuGLGetDevices);
CUDA_LIBRARY_FIND(cuGLGetDevices_v2);
CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
CUDA_LIBRARY_FIND(cuGLInit);
CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
@ -528,10 +550,10 @@ const char *cuewErrorString(CUresult result) {
case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED";
case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped";
case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
@ -548,37 +570,38 @@ const char *cuewErrorString(CUresult result) {
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE";
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED";
case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX";
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM";
case CUDA_ERROR_OPERATING_SYSTEM: return "Operating system";
case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
case CUDA_ERROR_NOT_FOUND: return "Not found";
case CUDA_ERROR_NOT_READY: return "CUDA not ready";
case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS";
case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED";
case CUDA_ERROR_ASSERT: return "ASSERT";
case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED";
case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR";
case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION";
case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS";
case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE";
case CUDA_ERROR_INVALID_PC: return "INVALID_PC";
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed";
case CUDA_ERROR_ASSERT: return "Assert";
case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered";
case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Hardware stack error";
case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
case CUDA_ERROR_INVALID_PC: return "Invalid pc";
case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED";
case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED";
case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
case CUDA_ERROR_UNKNOWN: return "Unknown error";
default: return "Unknown CUDA error value";
}
@ -686,7 +709,7 @@ int cuewCompilerVersion(void) {
while (!feof(pipe)) {
if (fgets(buf, sizeof(buf), pipe) != NULL) {
strncat(output, buf, sizeof(output) - strlen(output) - 1 );
strncat(output, buf, sizeof(output) - strlen(output) - 1);
}
}