mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-21 00:48:49 +03:00
Make zluda_dump more robust
This commit is contained in:
@ -2341,6 +2341,9 @@ extern_redirect! {
|
||||
dev: CUdevice,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuDeviceTotalMem(bytes: *mut usize, dev: CUdevice) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult;
|
||||
}
|
||||
@ -2405,6 +2408,13 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxCreate(
|
||||
pctx: *mut CUcontext,
|
||||
flags: ::std::os::raw::c_uint,
|
||||
dev: CUdevice,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxCreate_v2(
|
||||
pctx: *mut CUcontext,
|
||||
@ -2412,12 +2422,21 @@ extern_redirect! {
|
||||
dev: CUdevice,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxDestroy(ctx: CUcontext) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxPushCurrent(ctx: CUcontext) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxPopCurrent(pctx: *mut CUcontext) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult;
|
||||
}
|
||||
@ -2509,6 +2528,14 @@ extern_redirect_with! {
|
||||
) -> CUresult;
|
||||
super::cuModuleGetFunction;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuModuleGetGlobal(
|
||||
dptr: *mut CUdeviceptr,
|
||||
bytes: *mut usize,
|
||||
hmod: CUmodule,
|
||||
name: *const ::std::os::raw::c_char,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuModuleGetGlobal_v2(
|
||||
dptr: *mut CUdeviceptr,
|
||||
@ -2531,6 +2558,14 @@ extern_redirect! {
|
||||
name: *const ::std::os::raw::c_char,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuLinkCreate(
|
||||
numOptions: ::std::os::raw::c_uint,
|
||||
options: *mut CUjit_option,
|
||||
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||
stateOut: *mut CUlinkState,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuLinkCreate_v2(
|
||||
numOptions: ::std::os::raw::c_uint,
|
||||
@ -2539,6 +2574,18 @@ extern_redirect! {
|
||||
stateOut: *mut CUlinkState,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuLinkAddData(
|
||||
state: CUlinkState,
|
||||
type_: CUjitInputType,
|
||||
data: *mut ::std::os::raw::c_void,
|
||||
size: usize,
|
||||
name: *const ::std::os::raw::c_char,
|
||||
numOptions: ::std::os::raw::c_uint,
|
||||
options: *mut CUjit_option,
|
||||
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuLinkAddData_v2(
|
||||
state: CUlinkState,
|
||||
@ -2551,6 +2598,16 @@ extern_redirect! {
|
||||
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuLinkAddFile(
|
||||
state: CUlinkState,
|
||||
type_: CUjitInputType,
|
||||
path: *const ::std::os::raw::c_char,
|
||||
numOptions: ::std::os::raw::c_uint,
|
||||
options: *mut CUjit_option,
|
||||
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuLinkAddFile_v2(
|
||||
state: CUlinkState,
|
||||
@ -2571,13 +2628,29 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuLinkDestroy(state: CUlinkState) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemGetInfo(free: *mut usize, total: *mut usize) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult;
|
||||
}
|
||||
extern_redirect_with! {
|
||||
pub fn cuMemAlloc(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
|
||||
super::cuMemAlloc;
|
||||
}
|
||||
extern_redirect_with! {
|
||||
pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
|
||||
super::cuMemAlloc_v2;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemAllocPitch(
|
||||
dptr: *mut CUdeviceptr,
|
||||
pPitch: *mut usize,
|
||||
WidthInBytes: usize,
|
||||
Height: usize,
|
||||
ElementSizeBytes: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemAllocPitch_v2(
|
||||
dptr: *mut CUdeviceptr,
|
||||
@ -2587,9 +2660,19 @@ extern_redirect! {
|
||||
ElementSizeBytes: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemFree(dptr: CUdeviceptr) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemGetAddressRange(
|
||||
pbase: *mut CUdeviceptr,
|
||||
psize: *mut usize,
|
||||
dptr: CUdeviceptr,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemGetAddressRange_v2(
|
||||
pbase: *mut CUdeviceptr,
|
||||
@ -2597,18 +2680,29 @@ extern_redirect! {
|
||||
dptr: CUdeviceptr,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemAllocHost(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
extern_redirect_with! {
|
||||
pub fn cuMemHostAlloc(
|
||||
pp: *mut *mut ::std::os::raw::c_void,
|
||||
bytesize: usize,
|
||||
Flags: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
super::cuMemHostAlloc;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemHostGetDevicePointer(
|
||||
pdptr: *mut CUdeviceptr,
|
||||
p: *mut ::std::os::raw::c_void,
|
||||
Flags: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemHostGetDevicePointer_v2(
|
||||
@ -2669,6 +2763,13 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemHostRegister(
|
||||
p: *mut ::std::os::raw::c_void,
|
||||
bytesize: usize,
|
||||
Flags: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemHostRegister_v2(
|
||||
p: *mut ::std::os::raw::c_void,
|
||||
@ -2691,6 +2792,13 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoD(
|
||||
dstDevice: CUdeviceptr,
|
||||
srcHost: *const ::std::os::raw::c_void,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoD_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2698,6 +2806,13 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoH(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
srcDevice: CUdeviceptr,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoH_v2(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
@ -2705,6 +2820,13 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoD(
|
||||
dstDevice: CUdeviceptr,
|
||||
srcDevice: CUdeviceptr,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoD_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2712,6 +2834,14 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoA(
|
||||
dstArray: CUarray,
|
||||
dstOffset: usize,
|
||||
srcDevice: CUdeviceptr,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoA_v2(
|
||||
dstArray: CUarray,
|
||||
@ -2720,6 +2850,14 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoD(
|
||||
dstDevice: CUdeviceptr,
|
||||
srcArray: CUarray,
|
||||
srcOffset: usize,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoD_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2728,6 +2866,14 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoA(
|
||||
dstArray: CUarray,
|
||||
dstOffset: usize,
|
||||
srcHost: *const ::std::os::raw::c_void,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoA_v2(
|
||||
dstArray: CUarray,
|
||||
@ -2736,6 +2882,14 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoH(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
srcArray: CUarray,
|
||||
srcOffset: usize,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoH_v2(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
@ -2744,6 +2898,15 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoA(
|
||||
dstArray: CUarray,
|
||||
dstOffset: usize,
|
||||
srcArray: CUarray,
|
||||
srcOffset: usize,
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoA_v2(
|
||||
dstArray: CUarray,
|
||||
@ -2753,12 +2916,21 @@ extern_redirect! {
|
||||
ByteCount: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy2D(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy2DUnaligned(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy3D(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
|
||||
}
|
||||
@ -2783,6 +2955,14 @@ extern_redirect! {
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoDAsync(
|
||||
dstDevice: CUdeviceptr,
|
||||
srcHost: *const ::std::os::raw::c_void,
|
||||
ByteCount: usize,
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoDAsync_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2791,6 +2971,14 @@ extern_redirect! {
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoHAsync(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
srcDevice: CUdeviceptr,
|
||||
ByteCount: usize,
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoHAsync_v2(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
@ -2799,6 +2987,14 @@ extern_redirect! {
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoDAsync(
|
||||
dstDevice: CUdeviceptr,
|
||||
srcDevice: CUdeviceptr,
|
||||
ByteCount: usize,
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyDtoDAsync_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2807,6 +3003,15 @@ extern_redirect! {
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoAAsync(
|
||||
dstArray: CUarray,
|
||||
dstOffset: usize,
|
||||
srcHost: *const ::std::os::raw::c_void,
|
||||
ByteCount: usize,
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyHtoAAsync_v2(
|
||||
dstArray: CUarray,
|
||||
@ -2816,6 +3021,15 @@ extern_redirect! {
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoHAsync(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
srcArray: CUarray,
|
||||
srcOffset: usize,
|
||||
ByteCount: usize,
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpyAtoHAsync_v2(
|
||||
dstHost: *mut ::std::os::raw::c_void,
|
||||
@ -2825,19 +3039,36 @@ extern_redirect! {
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy2DAsync(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy3DAsync(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD8(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
|
||||
-> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
|
||||
-> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD16(
|
||||
dstDevice: CUdeviceptr,
|
||||
us: ::std::os::raw::c_ushort,
|
||||
N: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD16_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2845,10 +3076,23 @@ extern_redirect! {
|
||||
N: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD32(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
|
||||
-> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
|
||||
-> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD2D8(
|
||||
dstDevice: CUdeviceptr,
|
||||
dstPitch: usize,
|
||||
uc: ::std::os::raw::c_uchar,
|
||||
Width: usize,
|
||||
Height: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD2D8_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2858,6 +3102,15 @@ extern_redirect! {
|
||||
Height: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD2D16(
|
||||
dstDevice: CUdeviceptr,
|
||||
dstPitch: usize,
|
||||
us: ::std::os::raw::c_ushort,
|
||||
Width: usize,
|
||||
Height: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD2D16_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2867,6 +3120,15 @@ extern_redirect! {
|
||||
Height: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD2D32(
|
||||
dstDevice: CUdeviceptr,
|
||||
dstPitch: usize,
|
||||
ui: ::std::os::raw::c_uint,
|
||||
Width: usize,
|
||||
Height: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuMemsetD2D32_v2(
|
||||
dstDevice: CUdeviceptr,
|
||||
@ -2930,12 +3192,24 @@ extern_redirect! {
|
||||
hStream: CUstream,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArrayCreate(
|
||||
pHandle: *mut CUarray,
|
||||
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArrayCreate_v2(
|
||||
pHandle: *mut CUarray,
|
||||
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArrayGetDescriptor(
|
||||
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
|
||||
hArray: CUarray,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArrayGetDescriptor_v2(
|
||||
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
|
||||
@ -2957,12 +3231,24 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuArrayDestroy(hArray: CUarray) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArray3DCreate(
|
||||
pHandle: *mut CUarray,
|
||||
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArray3DCreate_v2(
|
||||
pHandle: *mut CUarray,
|
||||
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArray3DGetDescriptor(
|
||||
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
|
||||
hArray: CUarray,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuArray3DGetDescriptor_v2(
|
||||
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
|
||||
@ -3169,6 +3455,9 @@ extern_redirect! {
|
||||
flags: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuStreamBeginCapture(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
|
||||
}
|
||||
@ -3205,6 +3494,9 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuStreamDestroy(hStream: CUstream) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult;
|
||||
}
|
||||
@ -3244,6 +3536,9 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuEventDestroy(hEvent: CUevent) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult;
|
||||
}
|
||||
@ -3666,6 +3961,15 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuGraphInstantiate(
|
||||
phGraphExec: *mut CUgraphExec,
|
||||
hGraph: CUgraph,
|
||||
phErrorNode: *mut CUgraphNode,
|
||||
logBuffer: *mut ::std::os::raw::c_char,
|
||||
bufferSize: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuGraphInstantiate_v2(
|
||||
phGraphExec: *mut CUgraphExec,
|
||||
@ -3823,6 +4127,14 @@ extern_redirect! {
|
||||
Flags: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuTexRefSetAddress(
|
||||
ByteOffset: *mut usize,
|
||||
hTexRef: CUtexref,
|
||||
dptr: CUdeviceptr,
|
||||
bytes: usize,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuTexRefSetAddress_v2(
|
||||
ByteOffset: *mut usize,
|
||||
@ -3881,6 +4193,9 @@ extern_redirect! {
|
||||
extern_redirect! {
|
||||
pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuTexRefGetAddress(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
|
||||
}
|
||||
@ -4034,6 +4349,13 @@ extern_redirect! {
|
||||
resource: CUgraphicsResource,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuGraphicsResourceGetMappedPointer(
|
||||
pDevPtr: *mut CUdeviceptr,
|
||||
pSize: *mut usize,
|
||||
resource: CUgraphicsResource,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuGraphicsResourceGetMappedPointer_v2(
|
||||
pDevPtr: *mut CUdeviceptr,
|
||||
@ -4041,6 +4363,12 @@ extern_redirect! {
|
||||
resource: CUgraphicsResource,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuGraphicsResourceSetMapFlags(
|
||||
resource: CUgraphicsResource,
|
||||
flags: ::std::os::raw::c_uint,
|
||||
) -> CUresult;
|
||||
}
|
||||
extern_redirect! {
|
||||
pub fn cuGraphicsResourceSetMapFlags_v2(
|
||||
resource: CUgraphicsResource,
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
collections::{BTreeMap, HashMap},
|
||||
env,
|
||||
error::Error,
|
||||
ffi::{c_void, CStr},
|
||||
@ -24,6 +24,10 @@ use regex::Regex;
|
||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||
mod os;
|
||||
|
||||
const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
|
||||
const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
|
||||
const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _;
|
||||
|
||||
macro_rules! extern_redirect {
|
||||
(pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => {
|
||||
#[no_mangle]
|
||||
@ -68,11 +72,18 @@ mod cuda;
|
||||
pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
|
||||
pub static mut MODULES: Option<HashMap<CUmodule, ModuleDump>> = None;
|
||||
pub static mut KERNELS: Option<HashMap<CUfunction, KernelDump>> = None;
|
||||
pub static mut BUFFERS: Vec<(usize, usize)> = Vec::new();
|
||||
static mut BUFFERS: Option<BTreeMap<usize, (usize, AllocLocation)>> = None;
|
||||
pub static mut LAUNCH_COUNTER: usize = 0;
|
||||
pub static mut KERNEL_PATTERN: Option<Regex> = None;
|
||||
pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum AllocLocation {
|
||||
Device,
|
||||
DeviceV2,
|
||||
Host,
|
||||
}
|
||||
|
||||
pub struct ModuleDump {
|
||||
content: Rc<String>,
|
||||
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
||||
@ -88,6 +99,9 @@ pub struct KernelDump {
|
||||
// it's because CUDA Runtime API does dlopen to open libcuda.so, which ignores LD_PRELOAD
|
||||
pub unsafe fn init_libcuda_handle() {
|
||||
if LIBCUDA_HANDLE == ptr::null_mut() {
|
||||
MODULES = Some(HashMap::new());
|
||||
KERNELS = Some(HashMap::new());
|
||||
BUFFERS = Some(BTreeMap::new());
|
||||
let libcuda_handle = os::load_cuda_library();
|
||||
assert_ne!(libcuda_handle, ptr::null_mut());
|
||||
LIBCUDA_HANDLE = libcuda_handle;
|
||||
@ -162,8 +176,7 @@ unsafe fn record_module_image(module: CUmodule, image: &str) {
|
||||
None
|
||||
}
|
||||
};
|
||||
let modules = MODULES.get_or_insert_with(|| HashMap::new());
|
||||
modules.insert(
|
||||
MODULES.as_mut().unwrap().insert(
|
||||
module,
|
||||
ModuleDump {
|
||||
content: Rc::new(image.to_string()),
|
||||
@ -251,8 +264,7 @@ unsafe fn cuModuleGetFunction(
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new());
|
||||
kernel_args_map.insert(
|
||||
KERNELS.as_mut().unwrap().insert(
|
||||
*hfunc,
|
||||
KernelDump {
|
||||
module_content: module_dump.content.clone(),
|
||||
@ -272,16 +284,60 @@ unsafe fn cuModuleGetFunction(
|
||||
CUresult::CUDA_SUCCESS
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub unsafe fn cuMemAlloc(
|
||||
dptr: *mut CUdeviceptr,
|
||||
bytesize: usize,
|
||||
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
|
||||
) -> CUresult {
|
||||
cuMemAlloc_impl(false, dptr, bytesize, cont)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub unsafe fn cuMemAlloc_v2(
|
||||
dptr: *mut CUdeviceptr,
|
||||
bytesize: usize,
|
||||
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
|
||||
) -> CUresult {
|
||||
cuMemAlloc_impl(true, dptr, bytesize, cont)
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub unsafe fn cuMemAlloc_impl(
|
||||
is_v2: bool,
|
||||
dptr: *mut CUdeviceptr,
|
||||
bytesize: usize,
|
||||
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
|
||||
) -> CUresult {
|
||||
let result = cont(dptr, bytesize);
|
||||
assert_eq!(result, CUresult::CUDA_SUCCESS);
|
||||
let start = (*dptr).0 as usize;
|
||||
BUFFERS.push((start, bytesize));
|
||||
let location = if is_v2 {
|
||||
AllocLocation::DeviceV2
|
||||
} else {
|
||||
AllocLocation::Device
|
||||
};
|
||||
BUFFERS
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.insert(start, (bytesize, location));
|
||||
CUresult::CUDA_SUCCESS
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub unsafe fn cuMemHostAlloc(
|
||||
pp: *mut *mut c_void,
|
||||
bytesize: usize,
|
||||
flags: c_uint,
|
||||
cont: impl FnOnce(*mut *mut c_void, usize, c_uint) -> CUresult,
|
||||
) -> CUresult {
|
||||
let result = cont(pp, bytesize, flags);
|
||||
assert_eq!(result, CUresult::CUDA_SUCCESS);
|
||||
let start = (*pp) as usize;
|
||||
BUFFERS
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.insert(start, (bytesize, AllocLocation::Host));
|
||||
CUresult::CUDA_SUCCESS
|
||||
}
|
||||
|
||||
@ -330,6 +386,7 @@ pub unsafe fn cuLaunchKernel(
|
||||
blockDimZ,
|
||||
sharedMemBytes,
|
||||
kernelParams,
|
||||
extra,
|
||||
dump_env,
|
||||
)
|
||||
.unwrap_or_else(|err| os_log!("{}", err));
|
||||
@ -353,6 +410,7 @@ pub unsafe fn cuLaunchKernel(
|
||||
if let Some((_, kernel_dump)) = &dump_env {
|
||||
dump_arguments(
|
||||
kernelParams,
|
||||
extra,
|
||||
"post",
|
||||
&kernel_dump.name,
|
||||
LAUNCH_COUNTER,
|
||||
@ -423,6 +481,7 @@ unsafe fn dump_pre_data(
|
||||
blockDimZ: ::std::os::raw::c_uint,
|
||||
sharedMemBytes: ::std::os::raw::c_uint,
|
||||
kernelParams: *mut *mut ::std::os::raw::c_void,
|
||||
extra: *mut *mut ::std::os::raw::c_void,
|
||||
(dump_dir, kernel_dump): &(PathBuf, &'static KernelDump),
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
dump_launch_arguments(
|
||||
@ -441,6 +500,7 @@ unsafe fn dump_pre_data(
|
||||
module_file.write_all(kernel_dump.module_content.as_bytes())?;
|
||||
dump_arguments(
|
||||
kernelParams,
|
||||
extra,
|
||||
"pre",
|
||||
&kernel_dump.name,
|
||||
LAUNCH_COUNTER,
|
||||
@ -449,8 +509,9 @@ unsafe fn dump_pre_data(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn dump_arguments(
|
||||
fn dump_arguments(
|
||||
kernel_params: *mut *mut ::std::os::raw::c_void,
|
||||
extra: *mut *mut ::std::os::raw::c_void,
|
||||
prefix: &str,
|
||||
kernel_name: &str,
|
||||
counter: usize,
|
||||
@ -467,14 +528,84 @@ unsafe fn dump_arguments(
|
||||
fs::remove_dir_all(&dump_dir)?;
|
||||
}
|
||||
fs::create_dir_all(&dump_dir)?;
|
||||
if kernel_params != ptr::null_mut() {
|
||||
for (i, arg_len) in args.iter().enumerate() {
|
||||
let dev_ptr = *(*kernel_params.add(i) as *mut usize);
|
||||
match BUFFERS.iter().find(|(start, _)| *start == dev_ptr as usize) {
|
||||
Some((start, len)) => {
|
||||
let mut output = vec![0u8; *len];
|
||||
let error =
|
||||
cuda::cuMemcpyDtoH_v2(output.as_mut_ptr() as *mut _, CUdeviceptr(*start), *len);
|
||||
unsafe { dump_argument_to_file(&dump_dir, i, *arg_len, *kernel_params.add(i))? };
|
||||
}
|
||||
} else {
|
||||
let mut offset = 0;
|
||||
let mut buffer_ptr = None;
|
||||
let mut buffer_size = None;
|
||||
loop {
|
||||
match unsafe { *extra.add(offset) } {
|
||||
CU_LAUNCH_PARAM_END => break,
|
||||
CU_LAUNCH_PARAM_BUFFER_POINTER => {
|
||||
buffer_ptr = Some(unsafe { *extra.add(offset + 1) as *mut u8 });
|
||||
}
|
||||
CU_LAUNCH_PARAM_BUFFER_SIZE => {
|
||||
buffer_size = Some(unsafe { *(*extra.add(offset + 1) as *mut usize) });
|
||||
}
|
||||
_ => return Err("Malformed `extra` parameter to kernel launch")?,
|
||||
}
|
||||
offset += 2;
|
||||
}
|
||||
match (buffer_size, buffer_ptr) {
|
||||
(Some(buffer_size), Some(buffer_ptr)) => {
|
||||
let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
|
||||
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
|
||||
});
|
||||
if buffer_size != sum_of_kernel_argument_sizes {
|
||||
return Err("Malformed `extra` parameter to kernel launch")?;
|
||||
}
|
||||
let mut offset = 0;
|
||||
for (i, arg_size) in args.iter().enumerate() {
|
||||
let buffer_offset = round_up_to_multiple(offset, *arg_size);
|
||||
unsafe {
|
||||
dump_argument_to_file(
|
||||
&dump_dir,
|
||||
i,
|
||||
*arg_size,
|
||||
buffer_ptr.add(buffer_offset) as *const _,
|
||||
)?
|
||||
};
|
||||
offset = buffer_offset + *arg_size;
|
||||
}
|
||||
}
|
||||
_ => return Err("Malformed `extra` parameter to kernel launch")?,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
|
||||
((x + multiple - 1) / multiple) * multiple
|
||||
}
|
||||
|
||||
unsafe fn dump_argument_to_file(
|
||||
dump_dir: &PathBuf,
|
||||
i: usize,
|
||||
arg_len: usize,
|
||||
ptr: *const c_void,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
// Don't check if arg_len == sizeof(void*), there are libraries
|
||||
// which for some reason pass 32 pointers (4 bytes) in 8 byte arguments
|
||||
match get_buffer_length(*(ptr as *mut usize)) {
|
||||
Some((start, len, location)) => {
|
||||
let mut output = vec![0u8; len];
|
||||
let memcpy_fn = match location {
|
||||
AllocLocation::Device => |src, dst: usize, len| {
|
||||
let error = cuda::cuMemcpyDtoH(dst as *mut _, CUdeviceptr(src), len);
|
||||
assert_eq!(error, CUresult::CUDA_SUCCESS);
|
||||
},
|
||||
AllocLocation::DeviceV2 => |src, dst: usize, len| {
|
||||
let error = cuda::cuMemcpyDtoH_v2(dst as *mut _, CUdeviceptr(src), len);
|
||||
assert_eq!(error, CUresult::CUDA_SUCCESS);
|
||||
},
|
||||
AllocLocation::Host => |src, dst: usize, len| {
|
||||
ptr::copy_nonoverlapping(src as *mut u8, dst as *mut u8, len);
|
||||
},
|
||||
};
|
||||
memcpy_fn(start, output.as_mut_ptr() as usize, len);
|
||||
let mut path = dump_dir.clone();
|
||||
path.push(format!("arg_{:03}.buffer", i));
|
||||
let mut file = File::create(path)?;
|
||||
@ -484,16 +615,28 @@ unsafe fn dump_arguments(
|
||||
let mut path = dump_dir.clone();
|
||||
path.push(format!("arg_{:03}", i));
|
||||
let mut file = File::create(path)?;
|
||||
file.write_all(slice::from_raw_parts(
|
||||
*kernel_params.add(i) as *mut u8,
|
||||
*arg_len,
|
||||
))?;
|
||||
}
|
||||
file.write_all(slice::from_raw_parts(ptr as *mut u8, arg_len))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn get_buffer_length(ptr: usize) -> Option<(usize, usize, AllocLocation)> {
|
||||
BUFFERS
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.range(..=ptr)
|
||||
.next_back()
|
||||
.and_then(|(start, (len, loc))| {
|
||||
let end = *start + *len;
|
||||
if ptr < end {
|
||||
Some((ptr, end - ptr, *loc))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn get_dump_dir() -> Result<PathBuf, Box<dyn Error>> {
|
||||
let dir = env::var("ZLUDA_DUMP_DIR")?;
|
||||
let mut main_dir = PathBuf::from(dir);
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::os::windows::ffi::OsStrExt;
|
||||
use std::path::Path;
|
||||
use std::ptr;
|
||||
use std::{env, ops::Deref};
|
||||
@ -33,7 +34,7 @@ pub fn main_impl() -> Result<(), Box<dyn Error>> {
|
||||
let injector_dir = injector_path.parent().unwrap();
|
||||
let redirect_path = create_redirect_path(injector_dir);
|
||||
let (mut inject_nvcuda_path, mut inject_nvml_path, cmd) =
|
||||
create_inject_path(&args[1..], injector_dir);
|
||||
create_inject_path(&args[1..], injector_dir)?;
|
||||
let mut cmd_line = construct_command_line(cmd);
|
||||
let mut startup_info = unsafe { mem::zeroed::<detours_sys::_STARTUPINFOW>() };
|
||||
let mut proc_info = unsafe { mem::zeroed::<detours_sys::_PROCESS_INFORMATION>() };
|
||||
@ -110,7 +111,7 @@ fn print_help_and_exit() -> ! {
|
||||
{0} -- <EXE> [ARGS]...
|
||||
{0} <DLL> -- <EXE> [ARGS]...
|
||||
ARGS:
|
||||
<DLL> DLL to ne injected instead of system nvcuda.dll, if not provided
|
||||
<DLL> DLL to be injected instead of system nvcuda.dll, if not provided
|
||||
will use nvcuda.dll from the directory where {0} is located
|
||||
<EXE> Path to the executable to be injected with <DLL>
|
||||
<ARGS>... Arguments that will be passed to <EXE>
|
||||
@ -187,7 +188,7 @@ fn create_redirect_path(injector_dir: &Path) -> Vec<u8> {
|
||||
fn create_inject_path<'a>(
|
||||
args: &'a [String],
|
||||
injector_dir: &Path,
|
||||
) -> (Vec<u16>, Vec<u16>, &'a [String]) {
|
||||
) -> std::io::Result<(Vec<u16>, Vec<u16>, &'a [String])> {
|
||||
let injector_dir = injector_dir.to_path_buf();
|
||||
let (nvcuda_path, unparsed_args) = if args.get(0).map(Deref::deref) == Some("--") {
|
||||
(
|
||||
@ -195,14 +196,13 @@ fn create_inject_path<'a>(
|
||||
&args[1..],
|
||||
)
|
||||
} else if args.get(1).map(Deref::deref) == Some("--") {
|
||||
let mut dll_path = args[0].encode_utf16().collect::<Vec<_>>();
|
||||
dll_path.push(0);
|
||||
let dll_path = make_absolute_and_encode(&args[0])?;
|
||||
(dll_path, &args[2..])
|
||||
} else {
|
||||
print_help_and_exit()
|
||||
};
|
||||
let nvml_path = encode_file_in_directory_raw(injector_dir, ZLUDA_ML_DLL);
|
||||
(nvcuda_path, nvml_path, unparsed_args)
|
||||
Ok((nvcuda_path, nvml_path, unparsed_args))
|
||||
}
|
||||
|
||||
fn encode_file_in_directory_raw(mut dir: PathBuf, file: &'static str) -> Vec<u16> {
|
||||
@ -215,3 +215,16 @@ fn encode_file_in_directory_raw(mut dir: PathBuf, file: &'static str) -> Vec<u16
|
||||
result.push(0);
|
||||
result
|
||||
}
|
||||
|
||||
fn make_absolute_and_encode(maybe_path: &str) -> std::io::Result<Vec<u16>> {
|
||||
let path = Path::new(maybe_path);
|
||||
let mut encoded_path = if path.is_relative() {
|
||||
let mut current_dir = env::current_dir()?;
|
||||
current_dir.push(path);
|
||||
current_dir.as_os_str().encode_wide().collect::<Vec<_>>()
|
||||
} else {
|
||||
maybe_path.encode_utf16().collect::<Vec<_>>()
|
||||
};
|
||||
encoded_path.push(0);
|
||||
Ok(encoded_path)
|
||||
}
|
||||
|
Reference in New Issue
Block a user