mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-21 08:58:53 +03:00
Make zluda_dump more robust
This commit is contained in:
@ -2341,6 +2341,9 @@ extern_redirect! {
|
|||||||
dev: CUdevice,
|
dev: CUdevice,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuDeviceTotalMem(bytes: *mut usize, dev: CUdevice) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult;
|
pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult;
|
||||||
}
|
}
|
||||||
@ -2405,6 +2408,13 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult;
|
pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuCtxCreate(
|
||||||
|
pctx: *mut CUcontext,
|
||||||
|
flags: ::std::os::raw::c_uint,
|
||||||
|
dev: CUdevice,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuCtxCreate_v2(
|
pub fn cuCtxCreate_v2(
|
||||||
pctx: *mut CUcontext,
|
pctx: *mut CUcontext,
|
||||||
@ -2412,12 +2422,21 @@ extern_redirect! {
|
|||||||
dev: CUdevice,
|
dev: CUdevice,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuCtxDestroy(ctx: CUcontext) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult;
|
pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuCtxPushCurrent(ctx: CUcontext) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult;
|
pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuCtxPopCurrent(pctx: *mut CUcontext) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult;
|
pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult;
|
||||||
}
|
}
|
||||||
@ -2509,6 +2528,14 @@ extern_redirect_with! {
|
|||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
super::cuModuleGetFunction;
|
super::cuModuleGetFunction;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuModuleGetGlobal(
|
||||||
|
dptr: *mut CUdeviceptr,
|
||||||
|
bytes: *mut usize,
|
||||||
|
hmod: CUmodule,
|
||||||
|
name: *const ::std::os::raw::c_char,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuModuleGetGlobal_v2(
|
pub fn cuModuleGetGlobal_v2(
|
||||||
dptr: *mut CUdeviceptr,
|
dptr: *mut CUdeviceptr,
|
||||||
@ -2531,6 +2558,14 @@ extern_redirect! {
|
|||||||
name: *const ::std::os::raw::c_char,
|
name: *const ::std::os::raw::c_char,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuLinkCreate(
|
||||||
|
numOptions: ::std::os::raw::c_uint,
|
||||||
|
options: *mut CUjit_option,
|
||||||
|
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||||
|
stateOut: *mut CUlinkState,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuLinkCreate_v2(
|
pub fn cuLinkCreate_v2(
|
||||||
numOptions: ::std::os::raw::c_uint,
|
numOptions: ::std::os::raw::c_uint,
|
||||||
@ -2539,6 +2574,18 @@ extern_redirect! {
|
|||||||
stateOut: *mut CUlinkState,
|
stateOut: *mut CUlinkState,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuLinkAddData(
|
||||||
|
state: CUlinkState,
|
||||||
|
type_: CUjitInputType,
|
||||||
|
data: *mut ::std::os::raw::c_void,
|
||||||
|
size: usize,
|
||||||
|
name: *const ::std::os::raw::c_char,
|
||||||
|
numOptions: ::std::os::raw::c_uint,
|
||||||
|
options: *mut CUjit_option,
|
||||||
|
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuLinkAddData_v2(
|
pub fn cuLinkAddData_v2(
|
||||||
state: CUlinkState,
|
state: CUlinkState,
|
||||||
@ -2551,6 +2598,16 @@ extern_redirect! {
|
|||||||
optionValues: *mut *mut ::std::os::raw::c_void,
|
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuLinkAddFile(
|
||||||
|
state: CUlinkState,
|
||||||
|
type_: CUjitInputType,
|
||||||
|
path: *const ::std::os::raw::c_char,
|
||||||
|
numOptions: ::std::os::raw::c_uint,
|
||||||
|
options: *mut CUjit_option,
|
||||||
|
optionValues: *mut *mut ::std::os::raw::c_void,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuLinkAddFile_v2(
|
pub fn cuLinkAddFile_v2(
|
||||||
state: CUlinkState,
|
state: CUlinkState,
|
||||||
@ -2571,13 +2628,29 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuLinkDestroy(state: CUlinkState) -> CUresult;
|
pub fn cuLinkDestroy(state: CUlinkState) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemGetInfo(free: *mut usize, total: *mut usize) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult;
|
pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect_with! {
|
||||||
|
pub fn cuMemAlloc(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
|
||||||
|
super::cuMemAlloc;
|
||||||
|
}
|
||||||
extern_redirect_with! {
|
extern_redirect_with! {
|
||||||
pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
|
pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
|
||||||
super::cuMemAlloc_v2;
|
super::cuMemAlloc_v2;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemAllocPitch(
|
||||||
|
dptr: *mut CUdeviceptr,
|
||||||
|
pPitch: *mut usize,
|
||||||
|
WidthInBytes: usize,
|
||||||
|
Height: usize,
|
||||||
|
ElementSizeBytes: ::std::os::raw::c_uint,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemAllocPitch_v2(
|
pub fn cuMemAllocPitch_v2(
|
||||||
dptr: *mut CUdeviceptr,
|
dptr: *mut CUdeviceptr,
|
||||||
@ -2587,9 +2660,19 @@ extern_redirect! {
|
|||||||
ElementSizeBytes: ::std::os::raw::c_uint,
|
ElementSizeBytes: ::std::os::raw::c_uint,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemFree(dptr: CUdeviceptr) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
|
pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemGetAddressRange(
|
||||||
|
pbase: *mut CUdeviceptr,
|
||||||
|
psize: *mut usize,
|
||||||
|
dptr: CUdeviceptr,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemGetAddressRange_v2(
|
pub fn cuMemGetAddressRange_v2(
|
||||||
pbase: *mut CUdeviceptr,
|
pbase: *mut CUdeviceptr,
|
||||||
@ -2597,18 +2680,29 @@ extern_redirect! {
|
|||||||
dptr: CUdeviceptr,
|
dptr: CUdeviceptr,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemAllocHost(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
|
pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
|
||||||
}
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
|
pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
|
||||||
}
|
}
|
||||||
extern_redirect! {
|
extern_redirect_with! {
|
||||||
pub fn cuMemHostAlloc(
|
pub fn cuMemHostAlloc(
|
||||||
pp: *mut *mut ::std::os::raw::c_void,
|
pp: *mut *mut ::std::os::raw::c_void,
|
||||||
bytesize: usize,
|
bytesize: usize,
|
||||||
Flags: ::std::os::raw::c_uint,
|
Flags: ::std::os::raw::c_uint,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
|
super::cuMemHostAlloc;
|
||||||
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemHostGetDevicePointer(
|
||||||
|
pdptr: *mut CUdeviceptr,
|
||||||
|
p: *mut ::std::os::raw::c_void,
|
||||||
|
Flags: ::std::os::raw::c_uint,
|
||||||
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemHostGetDevicePointer_v2(
|
pub fn cuMemHostGetDevicePointer_v2(
|
||||||
@ -2669,6 +2763,13 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult;
|
pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemHostRegister(
|
||||||
|
p: *mut ::std::os::raw::c_void,
|
||||||
|
bytesize: usize,
|
||||||
|
Flags: ::std::os::raw::c_uint,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemHostRegister_v2(
|
pub fn cuMemHostRegister_v2(
|
||||||
p: *mut ::std::os::raw::c_void,
|
p: *mut ::std::os::raw::c_void,
|
||||||
@ -2691,6 +2792,13 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyHtoD(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
srcHost: *const ::std::os::raw::c_void,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyHtoD_v2(
|
pub fn cuMemcpyHtoD_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2698,6 +2806,13 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyDtoH(
|
||||||
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
|
srcDevice: CUdeviceptr,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyDtoH_v2(
|
pub fn cuMemcpyDtoH_v2(
|
||||||
dstHost: *mut ::std::os::raw::c_void,
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
@ -2705,6 +2820,13 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyDtoD(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
srcDevice: CUdeviceptr,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyDtoD_v2(
|
pub fn cuMemcpyDtoD_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2712,6 +2834,14 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyDtoA(
|
||||||
|
dstArray: CUarray,
|
||||||
|
dstOffset: usize,
|
||||||
|
srcDevice: CUdeviceptr,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyDtoA_v2(
|
pub fn cuMemcpyDtoA_v2(
|
||||||
dstArray: CUarray,
|
dstArray: CUarray,
|
||||||
@ -2720,6 +2850,14 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyAtoD(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
srcArray: CUarray,
|
||||||
|
srcOffset: usize,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyAtoD_v2(
|
pub fn cuMemcpyAtoD_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2728,6 +2866,14 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyHtoA(
|
||||||
|
dstArray: CUarray,
|
||||||
|
dstOffset: usize,
|
||||||
|
srcHost: *const ::std::os::raw::c_void,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyHtoA_v2(
|
pub fn cuMemcpyHtoA_v2(
|
||||||
dstArray: CUarray,
|
dstArray: CUarray,
|
||||||
@ -2736,6 +2882,14 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyAtoH(
|
||||||
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
|
srcArray: CUarray,
|
||||||
|
srcOffset: usize,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyAtoH_v2(
|
pub fn cuMemcpyAtoH_v2(
|
||||||
dstHost: *mut ::std::os::raw::c_void,
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
@ -2744,6 +2898,15 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyAtoA(
|
||||||
|
dstArray: CUarray,
|
||||||
|
dstOffset: usize,
|
||||||
|
srcArray: CUarray,
|
||||||
|
srcOffset: usize,
|
||||||
|
ByteCount: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyAtoA_v2(
|
pub fn cuMemcpyAtoA_v2(
|
||||||
dstArray: CUarray,
|
dstArray: CUarray,
|
||||||
@ -2753,12 +2916,21 @@ extern_redirect! {
|
|||||||
ByteCount: usize,
|
ByteCount: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpy2D(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpy2DUnaligned(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpy3D(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
|
pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
|
||||||
}
|
}
|
||||||
@ -2783,6 +2955,14 @@ extern_redirect! {
|
|||||||
hStream: CUstream,
|
hStream: CUstream,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyHtoDAsync(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
srcHost: *const ::std::os::raw::c_void,
|
||||||
|
ByteCount: usize,
|
||||||
|
hStream: CUstream,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyHtoDAsync_v2(
|
pub fn cuMemcpyHtoDAsync_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2791,6 +2971,14 @@ extern_redirect! {
|
|||||||
hStream: CUstream,
|
hStream: CUstream,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyDtoHAsync(
|
||||||
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
|
srcDevice: CUdeviceptr,
|
||||||
|
ByteCount: usize,
|
||||||
|
hStream: CUstream,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyDtoHAsync_v2(
|
pub fn cuMemcpyDtoHAsync_v2(
|
||||||
dstHost: *mut ::std::os::raw::c_void,
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
@ -2799,6 +2987,14 @@ extern_redirect! {
|
|||||||
hStream: CUstream,
|
hStream: CUstream,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyDtoDAsync(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
srcDevice: CUdeviceptr,
|
||||||
|
ByteCount: usize,
|
||||||
|
hStream: CUstream,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyDtoDAsync_v2(
|
pub fn cuMemcpyDtoDAsync_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2807,6 +3003,15 @@ extern_redirect! {
|
|||||||
hStream: CUstream,
|
hStream: CUstream,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyHtoAAsync(
|
||||||
|
dstArray: CUarray,
|
||||||
|
dstOffset: usize,
|
||||||
|
srcHost: *const ::std::os::raw::c_void,
|
||||||
|
ByteCount: usize,
|
||||||
|
hStream: CUstream,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyHtoAAsync_v2(
|
pub fn cuMemcpyHtoAAsync_v2(
|
||||||
dstArray: CUarray,
|
dstArray: CUarray,
|
||||||
@ -2816,6 +3021,15 @@ extern_redirect! {
|
|||||||
hStream: CUstream,
|
hStream: CUstream,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpyAtoHAsync(
|
||||||
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
|
srcArray: CUarray,
|
||||||
|
srcOffset: usize,
|
||||||
|
ByteCount: usize,
|
||||||
|
hStream: CUstream,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpyAtoHAsync_v2(
|
pub fn cuMemcpyAtoHAsync_v2(
|
||||||
dstHost: *mut ::std::os::raw::c_void,
|
dstHost: *mut ::std::os::raw::c_void,
|
||||||
@ -2825,19 +3039,36 @@ extern_redirect! {
|
|||||||
hStream: CUstream,
|
hStream: CUstream,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpy2DAsync(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
|
pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemcpy3DAsync(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
|
pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
|
||||||
}
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult;
|
pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemsetD8(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
|
||||||
|
-> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
|
pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
|
||||||
-> CUresult;
|
-> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemsetD16(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
us: ::std::os::raw::c_ushort,
|
||||||
|
N: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemsetD16_v2(
|
pub fn cuMemsetD16_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2845,10 +3076,23 @@ extern_redirect! {
|
|||||||
N: usize,
|
N: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemsetD32(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
|
||||||
|
-> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
|
pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
|
||||||
-> CUresult;
|
-> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemsetD2D8(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
dstPitch: usize,
|
||||||
|
uc: ::std::os::raw::c_uchar,
|
||||||
|
Width: usize,
|
||||||
|
Height: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemsetD2D8_v2(
|
pub fn cuMemsetD2D8_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2858,6 +3102,15 @@ extern_redirect! {
|
|||||||
Height: usize,
|
Height: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemsetD2D16(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
dstPitch: usize,
|
||||||
|
us: ::std::os::raw::c_ushort,
|
||||||
|
Width: usize,
|
||||||
|
Height: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemsetD2D16_v2(
|
pub fn cuMemsetD2D16_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2867,6 +3120,15 @@ extern_redirect! {
|
|||||||
Height: usize,
|
Height: usize,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuMemsetD2D32(
|
||||||
|
dstDevice: CUdeviceptr,
|
||||||
|
dstPitch: usize,
|
||||||
|
ui: ::std::os::raw::c_uint,
|
||||||
|
Width: usize,
|
||||||
|
Height: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuMemsetD2D32_v2(
|
pub fn cuMemsetD2D32_v2(
|
||||||
dstDevice: CUdeviceptr,
|
dstDevice: CUdeviceptr,
|
||||||
@ -2930,12 +3192,24 @@ extern_redirect! {
|
|||||||
hStream: CUstream,
|
hStream: CUstream,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuArrayCreate(
|
||||||
|
pHandle: *mut CUarray,
|
||||||
|
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuArrayCreate_v2(
|
pub fn cuArrayCreate_v2(
|
||||||
pHandle: *mut CUarray,
|
pHandle: *mut CUarray,
|
||||||
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
|
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuArrayGetDescriptor(
|
||||||
|
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
|
||||||
|
hArray: CUarray,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuArrayGetDescriptor_v2(
|
pub fn cuArrayGetDescriptor_v2(
|
||||||
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
|
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
|
||||||
@ -2957,12 +3231,24 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuArrayDestroy(hArray: CUarray) -> CUresult;
|
pub fn cuArrayDestroy(hArray: CUarray) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuArray3DCreate(
|
||||||
|
pHandle: *mut CUarray,
|
||||||
|
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuArray3DCreate_v2(
|
pub fn cuArray3DCreate_v2(
|
||||||
pHandle: *mut CUarray,
|
pHandle: *mut CUarray,
|
||||||
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
|
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuArray3DGetDescriptor(
|
||||||
|
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
|
||||||
|
hArray: CUarray,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuArray3DGetDescriptor_v2(
|
pub fn cuArray3DGetDescriptor_v2(
|
||||||
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
|
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
|
||||||
@ -3169,6 +3455,9 @@ extern_redirect! {
|
|||||||
flags: ::std::os::raw::c_uint,
|
flags: ::std::os::raw::c_uint,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuStreamBeginCapture(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
|
pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
|
||||||
}
|
}
|
||||||
@ -3205,6 +3494,9 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult;
|
pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuStreamDestroy(hStream: CUstream) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult;
|
pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult;
|
||||||
}
|
}
|
||||||
@ -3244,6 +3536,9 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult;
|
pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuEventDestroy(hEvent: CUevent) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult;
|
pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult;
|
||||||
}
|
}
|
||||||
@ -3666,6 +3961,15 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult;
|
pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuGraphInstantiate(
|
||||||
|
phGraphExec: *mut CUgraphExec,
|
||||||
|
hGraph: CUgraph,
|
||||||
|
phErrorNode: *mut CUgraphNode,
|
||||||
|
logBuffer: *mut ::std::os::raw::c_char,
|
||||||
|
bufferSize: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuGraphInstantiate_v2(
|
pub fn cuGraphInstantiate_v2(
|
||||||
phGraphExec: *mut CUgraphExec,
|
phGraphExec: *mut CUgraphExec,
|
||||||
@ -3823,6 +4127,14 @@ extern_redirect! {
|
|||||||
Flags: ::std::os::raw::c_uint,
|
Flags: ::std::os::raw::c_uint,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuTexRefSetAddress(
|
||||||
|
ByteOffset: *mut usize,
|
||||||
|
hTexRef: CUtexref,
|
||||||
|
dptr: CUdeviceptr,
|
||||||
|
bytes: usize,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuTexRefSetAddress_v2(
|
pub fn cuTexRefSetAddress_v2(
|
||||||
ByteOffset: *mut usize,
|
ByteOffset: *mut usize,
|
||||||
@ -3881,6 +4193,9 @@ extern_redirect! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult;
|
pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuTexRefGetAddress(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
|
pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
|
||||||
}
|
}
|
||||||
@ -4034,6 +4349,13 @@ extern_redirect! {
|
|||||||
resource: CUgraphicsResource,
|
resource: CUgraphicsResource,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuGraphicsResourceGetMappedPointer(
|
||||||
|
pDevPtr: *mut CUdeviceptr,
|
||||||
|
pSize: *mut usize,
|
||||||
|
resource: CUgraphicsResource,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuGraphicsResourceGetMappedPointer_v2(
|
pub fn cuGraphicsResourceGetMappedPointer_v2(
|
||||||
pDevPtr: *mut CUdeviceptr,
|
pDevPtr: *mut CUdeviceptr,
|
||||||
@ -4041,6 +4363,12 @@ extern_redirect! {
|
|||||||
resource: CUgraphicsResource,
|
resource: CUgraphicsResource,
|
||||||
) -> CUresult;
|
) -> CUresult;
|
||||||
}
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuGraphicsResourceSetMapFlags(
|
||||||
|
resource: CUgraphicsResource,
|
||||||
|
flags: ::std::os::raw::c_uint,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuGraphicsResourceSetMapFlags_v2(
|
pub fn cuGraphicsResourceSetMapFlags_v2(
|
||||||
resource: CUgraphicsResource,
|
resource: CUgraphicsResource,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::{BTreeMap, HashMap},
|
||||||
env,
|
env,
|
||||||
error::Error,
|
error::Error,
|
||||||
ffi::{c_void, CStr},
|
ffi::{c_void, CStr},
|
||||||
@ -24,6 +24,10 @@ use regex::Regex;
|
|||||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||||
mod os;
|
mod os;
|
||||||
|
|
||||||
|
const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
|
||||||
|
const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
|
||||||
|
const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _;
|
||||||
|
|
||||||
macro_rules! extern_redirect {
|
macro_rules! extern_redirect {
|
||||||
(pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => {
|
(pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => {
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
@ -68,11 +72,18 @@ mod cuda;
|
|||||||
pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
|
pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
|
||||||
pub static mut MODULES: Option<HashMap<CUmodule, ModuleDump>> = None;
|
pub static mut MODULES: Option<HashMap<CUmodule, ModuleDump>> = None;
|
||||||
pub static mut KERNELS: Option<HashMap<CUfunction, KernelDump>> = None;
|
pub static mut KERNELS: Option<HashMap<CUfunction, KernelDump>> = None;
|
||||||
pub static mut BUFFERS: Vec<(usize, usize)> = Vec::new();
|
static mut BUFFERS: Option<BTreeMap<usize, (usize, AllocLocation)>> = None;
|
||||||
pub static mut LAUNCH_COUNTER: usize = 0;
|
pub static mut LAUNCH_COUNTER: usize = 0;
|
||||||
pub static mut KERNEL_PATTERN: Option<Regex> = None;
|
pub static mut KERNEL_PATTERN: Option<Regex> = None;
|
||||||
pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
|
pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
enum AllocLocation {
|
||||||
|
Device,
|
||||||
|
DeviceV2,
|
||||||
|
Host,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ModuleDump {
|
pub struct ModuleDump {
|
||||||
content: Rc<String>,
|
content: Rc<String>,
|
||||||
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
kernels_args: Option<HashMap<String, Vec<usize>>>,
|
||||||
@ -88,6 +99,9 @@ pub struct KernelDump {
|
|||||||
// it's because CUDA Runtime API does dlopen to open libcuda.so, which ignores LD_PRELOAD
|
// it's because CUDA Runtime API does dlopen to open libcuda.so, which ignores LD_PRELOAD
|
||||||
pub unsafe fn init_libcuda_handle() {
|
pub unsafe fn init_libcuda_handle() {
|
||||||
if LIBCUDA_HANDLE == ptr::null_mut() {
|
if LIBCUDA_HANDLE == ptr::null_mut() {
|
||||||
|
MODULES = Some(HashMap::new());
|
||||||
|
KERNELS = Some(HashMap::new());
|
||||||
|
BUFFERS = Some(BTreeMap::new());
|
||||||
let libcuda_handle = os::load_cuda_library();
|
let libcuda_handle = os::load_cuda_library();
|
||||||
assert_ne!(libcuda_handle, ptr::null_mut());
|
assert_ne!(libcuda_handle, ptr::null_mut());
|
||||||
LIBCUDA_HANDLE = libcuda_handle;
|
LIBCUDA_HANDLE = libcuda_handle;
|
||||||
@ -162,8 +176,7 @@ unsafe fn record_module_image(module: CUmodule, image: &str) {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let modules = MODULES.get_or_insert_with(|| HashMap::new());
|
MODULES.as_mut().unwrap().insert(
|
||||||
modules.insert(
|
|
||||||
module,
|
module,
|
||||||
ModuleDump {
|
ModuleDump {
|
||||||
content: Rc::new(image.to_string()),
|
content: Rc::new(image.to_string()),
|
||||||
@ -251,8 +264,7 @@ unsafe fn cuModuleGetFunction(
|
|||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new());
|
KERNELS.as_mut().unwrap().insert(
|
||||||
kernel_args_map.insert(
|
|
||||||
*hfunc,
|
*hfunc,
|
||||||
KernelDump {
|
KernelDump {
|
||||||
module_content: module_dump.content.clone(),
|
module_content: module_dump.content.clone(),
|
||||||
@ -272,16 +284,60 @@ unsafe fn cuModuleGetFunction(
|
|||||||
CUresult::CUDA_SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub unsafe fn cuMemAlloc(
|
||||||
|
dptr: *mut CUdeviceptr,
|
||||||
|
bytesize: usize,
|
||||||
|
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
|
||||||
|
) -> CUresult {
|
||||||
|
cuMemAlloc_impl(false, dptr, bytesize, cont)
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
pub unsafe fn cuMemAlloc_v2(
|
pub unsafe fn cuMemAlloc_v2(
|
||||||
dptr: *mut CUdeviceptr,
|
dptr: *mut CUdeviceptr,
|
||||||
bytesize: usize,
|
bytesize: usize,
|
||||||
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
|
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
|
||||||
|
) -> CUresult {
|
||||||
|
cuMemAlloc_impl(true, dptr, bytesize, cont)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub unsafe fn cuMemAlloc_impl(
|
||||||
|
is_v2: bool,
|
||||||
|
dptr: *mut CUdeviceptr,
|
||||||
|
bytesize: usize,
|
||||||
|
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
|
||||||
) -> CUresult {
|
) -> CUresult {
|
||||||
let result = cont(dptr, bytesize);
|
let result = cont(dptr, bytesize);
|
||||||
assert_eq!(result, CUresult::CUDA_SUCCESS);
|
assert_eq!(result, CUresult::CUDA_SUCCESS);
|
||||||
let start = (*dptr).0 as usize;
|
let start = (*dptr).0 as usize;
|
||||||
BUFFERS.push((start, bytesize));
|
let location = if is_v2 {
|
||||||
|
AllocLocation::DeviceV2
|
||||||
|
} else {
|
||||||
|
AllocLocation::Device
|
||||||
|
};
|
||||||
|
BUFFERS
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.insert(start, (bytesize, location));
|
||||||
|
CUresult::CUDA_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub unsafe fn cuMemHostAlloc(
|
||||||
|
pp: *mut *mut c_void,
|
||||||
|
bytesize: usize,
|
||||||
|
flags: c_uint,
|
||||||
|
cont: impl FnOnce(*mut *mut c_void, usize, c_uint) -> CUresult,
|
||||||
|
) -> CUresult {
|
||||||
|
let result = cont(pp, bytesize, flags);
|
||||||
|
assert_eq!(result, CUresult::CUDA_SUCCESS);
|
||||||
|
let start = (*pp) as usize;
|
||||||
|
BUFFERS
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.insert(start, (bytesize, AllocLocation::Host));
|
||||||
CUresult::CUDA_SUCCESS
|
CUresult::CUDA_SUCCESS
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,6 +386,7 @@ pub unsafe fn cuLaunchKernel(
|
|||||||
blockDimZ,
|
blockDimZ,
|
||||||
sharedMemBytes,
|
sharedMemBytes,
|
||||||
kernelParams,
|
kernelParams,
|
||||||
|
extra,
|
||||||
dump_env,
|
dump_env,
|
||||||
)
|
)
|
||||||
.unwrap_or_else(|err| os_log!("{}", err));
|
.unwrap_or_else(|err| os_log!("{}", err));
|
||||||
@ -353,6 +410,7 @@ pub unsafe fn cuLaunchKernel(
|
|||||||
if let Some((_, kernel_dump)) = &dump_env {
|
if let Some((_, kernel_dump)) = &dump_env {
|
||||||
dump_arguments(
|
dump_arguments(
|
||||||
kernelParams,
|
kernelParams,
|
||||||
|
extra,
|
||||||
"post",
|
"post",
|
||||||
&kernel_dump.name,
|
&kernel_dump.name,
|
||||||
LAUNCH_COUNTER,
|
LAUNCH_COUNTER,
|
||||||
@ -423,6 +481,7 @@ unsafe fn dump_pre_data(
|
|||||||
blockDimZ: ::std::os::raw::c_uint,
|
blockDimZ: ::std::os::raw::c_uint,
|
||||||
sharedMemBytes: ::std::os::raw::c_uint,
|
sharedMemBytes: ::std::os::raw::c_uint,
|
||||||
kernelParams: *mut *mut ::std::os::raw::c_void,
|
kernelParams: *mut *mut ::std::os::raw::c_void,
|
||||||
|
extra: *mut *mut ::std::os::raw::c_void,
|
||||||
(dump_dir, kernel_dump): &(PathBuf, &'static KernelDump),
|
(dump_dir, kernel_dump): &(PathBuf, &'static KernelDump),
|
||||||
) -> Result<(), Box<dyn Error>> {
|
) -> Result<(), Box<dyn Error>> {
|
||||||
dump_launch_arguments(
|
dump_launch_arguments(
|
||||||
@ -441,6 +500,7 @@ unsafe fn dump_pre_data(
|
|||||||
module_file.write_all(kernel_dump.module_content.as_bytes())?;
|
module_file.write_all(kernel_dump.module_content.as_bytes())?;
|
||||||
dump_arguments(
|
dump_arguments(
|
||||||
kernelParams,
|
kernelParams,
|
||||||
|
extra,
|
||||||
"pre",
|
"pre",
|
||||||
&kernel_dump.name,
|
&kernel_dump.name,
|
||||||
LAUNCH_COUNTER,
|
LAUNCH_COUNTER,
|
||||||
@ -449,8 +509,9 @@ unsafe fn dump_pre_data(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn dump_arguments(
|
fn dump_arguments(
|
||||||
kernel_params: *mut *mut ::std::os::raw::c_void,
|
kernel_params: *mut *mut ::std::os::raw::c_void,
|
||||||
|
extra: *mut *mut ::std::os::raw::c_void,
|
||||||
prefix: &str,
|
prefix: &str,
|
||||||
kernel_name: &str,
|
kernel_name: &str,
|
||||||
counter: usize,
|
counter: usize,
|
||||||
@ -467,33 +528,115 @@ unsafe fn dump_arguments(
|
|||||||
fs::remove_dir_all(&dump_dir)?;
|
fs::remove_dir_all(&dump_dir)?;
|
||||||
}
|
}
|
||||||
fs::create_dir_all(&dump_dir)?;
|
fs::create_dir_all(&dump_dir)?;
|
||||||
for (i, arg_len) in args.iter().enumerate() {
|
if kernel_params != ptr::null_mut() {
|
||||||
let dev_ptr = *(*kernel_params.add(i) as *mut usize);
|
for (i, arg_len) in args.iter().enumerate() {
|
||||||
match BUFFERS.iter().find(|(start, _)| *start == dev_ptr as usize) {
|
unsafe { dump_argument_to_file(&dump_dir, i, *arg_len, *kernel_params.add(i))? };
|
||||||
Some((start, len)) => {
|
}
|
||||||
let mut output = vec![0u8; *len];
|
} else {
|
||||||
let error =
|
let mut offset = 0;
|
||||||
cuda::cuMemcpyDtoH_v2(output.as_mut_ptr() as *mut _, CUdeviceptr(*start), *len);
|
let mut buffer_ptr = None;
|
||||||
assert_eq!(error, CUresult::CUDA_SUCCESS);
|
let mut buffer_size = None;
|
||||||
let mut path = dump_dir.clone();
|
loop {
|
||||||
path.push(format!("arg_{:03}.buffer", i));
|
match unsafe { *extra.add(offset) } {
|
||||||
let mut file = File::create(path)?;
|
CU_LAUNCH_PARAM_END => break,
|
||||||
file.write_all(&mut output)?;
|
CU_LAUNCH_PARAM_BUFFER_POINTER => {
|
||||||
|
buffer_ptr = Some(unsafe { *extra.add(offset + 1) as *mut u8 });
|
||||||
|
}
|
||||||
|
CU_LAUNCH_PARAM_BUFFER_SIZE => {
|
||||||
|
buffer_size = Some(unsafe { *(*extra.add(offset + 1) as *mut usize) });
|
||||||
|
}
|
||||||
|
_ => return Err("Malformed `extra` parameter to kernel launch")?,
|
||||||
}
|
}
|
||||||
None => {
|
offset += 2;
|
||||||
let mut path = dump_dir.clone();
|
}
|
||||||
path.push(format!("arg_{:03}", i));
|
match (buffer_size, buffer_ptr) {
|
||||||
let mut file = File::create(path)?;
|
(Some(buffer_size), Some(buffer_ptr)) => {
|
||||||
file.write_all(slice::from_raw_parts(
|
let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
|
||||||
*kernel_params.add(i) as *mut u8,
|
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
|
||||||
*arg_len,
|
});
|
||||||
))?;
|
if buffer_size != sum_of_kernel_argument_sizes {
|
||||||
|
return Err("Malformed `extra` parameter to kernel launch")?;
|
||||||
|
}
|
||||||
|
let mut offset = 0;
|
||||||
|
for (i, arg_size) in args.iter().enumerate() {
|
||||||
|
let buffer_offset = round_up_to_multiple(offset, *arg_size);
|
||||||
|
unsafe {
|
||||||
|
dump_argument_to_file(
|
||||||
|
&dump_dir,
|
||||||
|
i,
|
||||||
|
*arg_size,
|
||||||
|
buffer_ptr.add(buffer_offset) as *const _,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
offset = buffer_offset + *arg_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
_ => return Err("Malformed `extra` parameter to kernel launch")?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
|
||||||
|
((x + multiple - 1) / multiple) * multiple
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn dump_argument_to_file(
|
||||||
|
dump_dir: &PathBuf,
|
||||||
|
i: usize,
|
||||||
|
arg_len: usize,
|
||||||
|
ptr: *const c_void,
|
||||||
|
) -> Result<(), Box<dyn Error>> {
|
||||||
|
// Don't check if arg_len == sizeof(void*), there are libraries
|
||||||
|
// which for some reason pass 32 pointers (4 bytes) in 8 byte arguments
|
||||||
|
match get_buffer_length(*(ptr as *mut usize)) {
|
||||||
|
Some((start, len, location)) => {
|
||||||
|
let mut output = vec![0u8; len];
|
||||||
|
let memcpy_fn = match location {
|
||||||
|
AllocLocation::Device => |src, dst: usize, len| {
|
||||||
|
let error = cuda::cuMemcpyDtoH(dst as *mut _, CUdeviceptr(src), len);
|
||||||
|
assert_eq!(error, CUresult::CUDA_SUCCESS);
|
||||||
|
},
|
||||||
|
AllocLocation::DeviceV2 => |src, dst: usize, len| {
|
||||||
|
let error = cuda::cuMemcpyDtoH_v2(dst as *mut _, CUdeviceptr(src), len);
|
||||||
|
assert_eq!(error, CUresult::CUDA_SUCCESS);
|
||||||
|
},
|
||||||
|
AllocLocation::Host => |src, dst: usize, len| {
|
||||||
|
ptr::copy_nonoverlapping(src as *mut u8, dst as *mut u8, len);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
memcpy_fn(start, output.as_mut_ptr() as usize, len);
|
||||||
|
let mut path = dump_dir.clone();
|
||||||
|
path.push(format!("arg_{:03}.buffer", i));
|
||||||
|
let mut file = File::create(path)?;
|
||||||
|
file.write_all(&mut output)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut path = dump_dir.clone();
|
||||||
|
path.push(format!("arg_{:03}", i));
|
||||||
|
let mut file = File::create(path)?;
|
||||||
|
file.write_all(slice::from_raw_parts(ptr as *mut u8, arg_len))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn get_buffer_length(ptr: usize) -> Option<(usize, usize, AllocLocation)> {
|
||||||
|
BUFFERS
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.range(..=ptr)
|
||||||
|
.next_back()
|
||||||
|
.and_then(|(start, (len, loc))| {
|
||||||
|
let end = *start + *len;
|
||||||
|
if ptr < end {
|
||||||
|
Some((ptr, end - ptr, *loc))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn get_dump_dir() -> Result<PathBuf, Box<dyn Error>> {
|
fn get_dump_dir() -> Result<PathBuf, Box<dyn Error>> {
|
||||||
let dir = env::var("ZLUDA_DUMP_DIR")?;
|
let dir = env::var("ZLUDA_DUMP_DIR")?;
|
||||||
let mut main_dir = PathBuf::from(dir);
|
let mut main_dir = PathBuf::from(dir);
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::os::windows::ffi::OsStrExt;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
use std::{env, ops::Deref};
|
use std::{env, ops::Deref};
|
||||||
@ -33,7 +34,7 @@ pub fn main_impl() -> Result<(), Box<dyn Error>> {
|
|||||||
let injector_dir = injector_path.parent().unwrap();
|
let injector_dir = injector_path.parent().unwrap();
|
||||||
let redirect_path = create_redirect_path(injector_dir);
|
let redirect_path = create_redirect_path(injector_dir);
|
||||||
let (mut inject_nvcuda_path, mut inject_nvml_path, cmd) =
|
let (mut inject_nvcuda_path, mut inject_nvml_path, cmd) =
|
||||||
create_inject_path(&args[1..], injector_dir);
|
create_inject_path(&args[1..], injector_dir)?;
|
||||||
let mut cmd_line = construct_command_line(cmd);
|
let mut cmd_line = construct_command_line(cmd);
|
||||||
let mut startup_info = unsafe { mem::zeroed::<detours_sys::_STARTUPINFOW>() };
|
let mut startup_info = unsafe { mem::zeroed::<detours_sys::_STARTUPINFOW>() };
|
||||||
let mut proc_info = unsafe { mem::zeroed::<detours_sys::_PROCESS_INFORMATION>() };
|
let mut proc_info = unsafe { mem::zeroed::<detours_sys::_PROCESS_INFORMATION>() };
|
||||||
@ -110,7 +111,7 @@ fn print_help_and_exit() -> ! {
|
|||||||
{0} -- <EXE> [ARGS]...
|
{0} -- <EXE> [ARGS]...
|
||||||
{0} <DLL> -- <EXE> [ARGS]...
|
{0} <DLL> -- <EXE> [ARGS]...
|
||||||
ARGS:
|
ARGS:
|
||||||
<DLL> DLL to ne injected instead of system nvcuda.dll, if not provided
|
<DLL> DLL to be injected instead of system nvcuda.dll, if not provided
|
||||||
will use nvcuda.dll from the directory where {0} is located
|
will use nvcuda.dll from the directory where {0} is located
|
||||||
<EXE> Path to the executable to be injected with <DLL>
|
<EXE> Path to the executable to be injected with <DLL>
|
||||||
<ARGS>... Arguments that will be passed to <EXE>
|
<ARGS>... Arguments that will be passed to <EXE>
|
||||||
@ -187,7 +188,7 @@ fn create_redirect_path(injector_dir: &Path) -> Vec<u8> {
|
|||||||
fn create_inject_path<'a>(
|
fn create_inject_path<'a>(
|
||||||
args: &'a [String],
|
args: &'a [String],
|
||||||
injector_dir: &Path,
|
injector_dir: &Path,
|
||||||
) -> (Vec<u16>, Vec<u16>, &'a [String]) {
|
) -> std::io::Result<(Vec<u16>, Vec<u16>, &'a [String])> {
|
||||||
let injector_dir = injector_dir.to_path_buf();
|
let injector_dir = injector_dir.to_path_buf();
|
||||||
let (nvcuda_path, unparsed_args) = if args.get(0).map(Deref::deref) == Some("--") {
|
let (nvcuda_path, unparsed_args) = if args.get(0).map(Deref::deref) == Some("--") {
|
||||||
(
|
(
|
||||||
@ -195,14 +196,13 @@ fn create_inject_path<'a>(
|
|||||||
&args[1..],
|
&args[1..],
|
||||||
)
|
)
|
||||||
} else if args.get(1).map(Deref::deref) == Some("--") {
|
} else if args.get(1).map(Deref::deref) == Some("--") {
|
||||||
let mut dll_path = args[0].encode_utf16().collect::<Vec<_>>();
|
let dll_path = make_absolute_and_encode(&args[0])?;
|
||||||
dll_path.push(0);
|
|
||||||
(dll_path, &args[2..])
|
(dll_path, &args[2..])
|
||||||
} else {
|
} else {
|
||||||
print_help_and_exit()
|
print_help_and_exit()
|
||||||
};
|
};
|
||||||
let nvml_path = encode_file_in_directory_raw(injector_dir, ZLUDA_ML_DLL);
|
let nvml_path = encode_file_in_directory_raw(injector_dir, ZLUDA_ML_DLL);
|
||||||
(nvcuda_path, nvml_path, unparsed_args)
|
Ok((nvcuda_path, nvml_path, unparsed_args))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_file_in_directory_raw(mut dir: PathBuf, file: &'static str) -> Vec<u16> {
|
fn encode_file_in_directory_raw(mut dir: PathBuf, file: &'static str) -> Vec<u16> {
|
||||||
@ -215,3 +215,16 @@ fn encode_file_in_directory_raw(mut dir: PathBuf, file: &'static str) -> Vec<u16
|
|||||||
result.push(0);
|
result.push(0);
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn make_absolute_and_encode(maybe_path: &str) -> std::io::Result<Vec<u16>> {
|
||||||
|
let path = Path::new(maybe_path);
|
||||||
|
let mut encoded_path = if path.is_relative() {
|
||||||
|
let mut current_dir = env::current_dir()?;
|
||||||
|
current_dir.push(path);
|
||||||
|
current_dir.as_os_str().encode_wide().collect::<Vec<_>>()
|
||||||
|
} else {
|
||||||
|
maybe_path.encode_utf16().collect::<Vec<_>>()
|
||||||
|
};
|
||||||
|
encoded_path.push(0);
|
||||||
|
Ok(encoded_path)
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user