Make zluda_dump more robust

This commit is contained in:
Andrzej Janik
2021-04-12 00:18:27 +02:00
parent a39dda67d1
commit 96f95d59ce
3 changed files with 519 additions and 35 deletions

View File

@ -2341,6 +2341,9 @@ extern_redirect! {
dev: CUdevice,
) -> CUresult;
}
extern_redirect! {
pub fn cuDeviceTotalMem(bytes: *mut usize, dev: CUdevice) -> CUresult;
}
extern_redirect! {
pub fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: CUdevice) -> CUresult;
}
@ -2405,6 +2408,13 @@ extern_redirect! {
extern_redirect! {
pub fn cuDevicePrimaryCtxReset_v2(dev: CUdevice) -> CUresult;
}
extern_redirect! {
pub fn cuCtxCreate(
pctx: *mut CUcontext,
flags: ::std::os::raw::c_uint,
dev: CUdevice,
) -> CUresult;
}
extern_redirect! {
pub fn cuCtxCreate_v2(
pctx: *mut CUcontext,
@ -2412,12 +2422,21 @@ extern_redirect! {
dev: CUdevice,
) -> CUresult;
}
extern_redirect! {
pub fn cuCtxDestroy(ctx: CUcontext) -> CUresult;
}
extern_redirect! {
pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult;
}
extern_redirect! {
pub fn cuCtxPushCurrent(ctx: CUcontext) -> CUresult;
}
extern_redirect! {
pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult;
}
extern_redirect! {
pub fn cuCtxPopCurrent(pctx: *mut CUcontext) -> CUresult;
}
extern_redirect! {
pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult;
}
@ -2509,6 +2528,14 @@ extern_redirect_with! {
) -> CUresult;
super::cuModuleGetFunction;
}
extern_redirect! {
pub fn cuModuleGetGlobal(
dptr: *mut CUdeviceptr,
bytes: *mut usize,
hmod: CUmodule,
name: *const ::std::os::raw::c_char,
) -> CUresult;
}
extern_redirect! {
pub fn cuModuleGetGlobal_v2(
dptr: *mut CUdeviceptr,
@ -2531,6 +2558,14 @@ extern_redirect! {
name: *const ::std::os::raw::c_char,
) -> CUresult;
}
extern_redirect! {
pub fn cuLinkCreate(
numOptions: ::std::os::raw::c_uint,
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
stateOut: *mut CUlinkState,
) -> CUresult;
}
extern_redirect! {
pub fn cuLinkCreate_v2(
numOptions: ::std::os::raw::c_uint,
@ -2539,6 +2574,18 @@ extern_redirect! {
stateOut: *mut CUlinkState,
) -> CUresult;
}
extern_redirect! {
pub fn cuLinkAddData(
state: CUlinkState,
type_: CUjitInputType,
data: *mut ::std::os::raw::c_void,
size: usize,
name: *const ::std::os::raw::c_char,
numOptions: ::std::os::raw::c_uint,
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
}
extern_redirect! {
pub fn cuLinkAddData_v2(
state: CUlinkState,
@ -2551,6 +2598,16 @@ extern_redirect! {
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
}
extern_redirect! {
pub fn cuLinkAddFile(
state: CUlinkState,
type_: CUjitInputType,
path: *const ::std::os::raw::c_char,
numOptions: ::std::os::raw::c_uint,
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult;
}
extern_redirect! {
pub fn cuLinkAddFile_v2(
state: CUlinkState,
@ -2571,13 +2628,29 @@ extern_redirect! {
extern_redirect! {
pub fn cuLinkDestroy(state: CUlinkState) -> CUresult;
}
extern_redirect! {
pub fn cuMemGetInfo(free: *mut usize, total: *mut usize) -> CUresult;
}
extern_redirect! {
pub fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult;
}
extern_redirect_with! {
pub fn cuMemAlloc(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
super::cuMemAlloc;
}
extern_redirect_with! {
pub fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult;
super::cuMemAlloc_v2;
}
extern_redirect! {
pub fn cuMemAllocPitch(
dptr: *mut CUdeviceptr,
pPitch: *mut usize,
WidthInBytes: usize,
Height: usize,
ElementSizeBytes: ::std::os::raw::c_uint,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemAllocPitch_v2(
dptr: *mut CUdeviceptr,
@ -2587,9 +2660,19 @@ extern_redirect! {
ElementSizeBytes: ::std::os::raw::c_uint,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemFree(dptr: CUdeviceptr) -> CUresult;
}
extern_redirect! {
pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
}
extern_redirect! {
pub fn cuMemGetAddressRange(
pbase: *mut CUdeviceptr,
psize: *mut usize,
dptr: CUdeviceptr,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemGetAddressRange_v2(
pbase: *mut CUdeviceptr,
@ -2597,18 +2680,29 @@ extern_redirect! {
dptr: CUdeviceptr,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemAllocHost(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
}
extern_redirect! {
pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
}
extern_redirect! {
pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
}
extern_redirect! {
extern_redirect_with! {
pub fn cuMemHostAlloc(
pp: *mut *mut ::std::os::raw::c_void,
bytesize: usize,
Flags: ::std::os::raw::c_uint,
) -> CUresult;
super::cuMemHostAlloc;
}
extern_redirect! {
pub fn cuMemHostGetDevicePointer(
pdptr: *mut CUdeviceptr,
p: *mut ::std::os::raw::c_void,
Flags: ::std::os::raw::c_uint,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemHostGetDevicePointer_v2(
@ -2669,6 +2763,13 @@ extern_redirect! {
extern_redirect! {
pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult;
}
extern_redirect! {
pub fn cuMemHostRegister(
p: *mut ::std::os::raw::c_void,
bytesize: usize,
Flags: ::std::os::raw::c_uint,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemHostRegister_v2(
p: *mut ::std::os::raw::c_void,
@ -2691,6 +2792,13 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoD(
dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoD_v2(
dstDevice: CUdeviceptr,
@ -2698,6 +2806,13 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoH(
dstHost: *mut ::std::os::raw::c_void,
srcDevice: CUdeviceptr,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoH_v2(
dstHost: *mut ::std::os::raw::c_void,
@ -2705,6 +2820,13 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoD(
dstDevice: CUdeviceptr,
srcDevice: CUdeviceptr,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoD_v2(
dstDevice: CUdeviceptr,
@ -2712,6 +2834,14 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoA(
dstArray: CUarray,
dstOffset: usize,
srcDevice: CUdeviceptr,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoA_v2(
dstArray: CUarray,
@ -2720,6 +2850,14 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoD(
dstDevice: CUdeviceptr,
srcArray: CUarray,
srcOffset: usize,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoD_v2(
dstDevice: CUdeviceptr,
@ -2728,6 +2866,14 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoA(
dstArray: CUarray,
dstOffset: usize,
srcHost: *const ::std::os::raw::c_void,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoA_v2(
dstArray: CUarray,
@ -2736,6 +2882,14 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoH(
dstHost: *mut ::std::os::raw::c_void,
srcArray: CUarray,
srcOffset: usize,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoH_v2(
dstHost: *mut ::std::os::raw::c_void,
@ -2744,6 +2898,15 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoA(
dstArray: CUarray,
dstOffset: usize,
srcArray: CUarray,
srcOffset: usize,
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoA_v2(
dstArray: CUarray,
@ -2753,12 +2916,21 @@ extern_redirect! {
ByteCount: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy2D(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy2DUnaligned(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy3D(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult;
}
@ -2783,6 +2955,14 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoDAsync(
dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void,
ByteCount: usize,
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoDAsync_v2(
dstDevice: CUdeviceptr,
@ -2791,6 +2971,14 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoHAsync(
dstHost: *mut ::std::os::raw::c_void,
srcDevice: CUdeviceptr,
ByteCount: usize,
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoHAsync_v2(
dstHost: *mut ::std::os::raw::c_void,
@ -2799,6 +2987,14 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoDAsync(
dstDevice: CUdeviceptr,
srcDevice: CUdeviceptr,
ByteCount: usize,
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyDtoDAsync_v2(
dstDevice: CUdeviceptr,
@ -2807,6 +3003,15 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoAAsync(
dstArray: CUarray,
dstOffset: usize,
srcHost: *const ::std::os::raw::c_void,
ByteCount: usize,
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyHtoAAsync_v2(
dstArray: CUarray,
@ -2816,6 +3021,15 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoHAsync(
dstHost: *mut ::std::os::raw::c_void,
srcArray: CUarray,
srcOffset: usize,
ByteCount: usize,
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpyAtoHAsync_v2(
dstHost: *mut ::std::os::raw::c_void,
@ -2825,19 +3039,36 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy2DAsync(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy3DAsync(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD8(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
-> CUresult;
}
extern_redirect! {
pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::std::os::raw::c_uchar, N: usize)
-> CUresult;
}
extern_redirect! {
pub fn cuMemsetD16(
dstDevice: CUdeviceptr,
us: ::std::os::raw::c_ushort,
N: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD16_v2(
dstDevice: CUdeviceptr,
@ -2845,10 +3076,23 @@ extern_redirect! {
N: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD32(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
-> CUresult;
}
extern_redirect! {
pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::std::os::raw::c_uint, N: usize)
-> CUresult;
}
extern_redirect! {
pub fn cuMemsetD2D8(
dstDevice: CUdeviceptr,
dstPitch: usize,
uc: ::std::os::raw::c_uchar,
Width: usize,
Height: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD2D8_v2(
dstDevice: CUdeviceptr,
@ -2858,6 +3102,15 @@ extern_redirect! {
Height: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD2D16(
dstDevice: CUdeviceptr,
dstPitch: usize,
us: ::std::os::raw::c_ushort,
Width: usize,
Height: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD2D16_v2(
dstDevice: CUdeviceptr,
@ -2867,6 +3120,15 @@ extern_redirect! {
Height: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD2D32(
dstDevice: CUdeviceptr,
dstPitch: usize,
ui: ::std::os::raw::c_uint,
Width: usize,
Height: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuMemsetD2D32_v2(
dstDevice: CUdeviceptr,
@ -2930,12 +3192,24 @@ extern_redirect! {
hStream: CUstream,
) -> CUresult;
}
extern_redirect! {
pub fn cuArrayCreate(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
) -> CUresult;
}
extern_redirect! {
pub fn cuArrayCreate_v2(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR,
) -> CUresult;
}
extern_redirect! {
pub fn cuArrayGetDescriptor(
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
hArray: CUarray,
) -> CUresult;
}
extern_redirect! {
pub fn cuArrayGetDescriptor_v2(
pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR,
@ -2957,12 +3231,24 @@ extern_redirect! {
extern_redirect! {
pub fn cuArrayDestroy(hArray: CUarray) -> CUresult;
}
extern_redirect! {
pub fn cuArray3DCreate(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
) -> CUresult;
}
extern_redirect! {
pub fn cuArray3DCreate_v2(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
) -> CUresult;
}
extern_redirect! {
pub fn cuArray3DGetDescriptor(
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
hArray: CUarray,
) -> CUresult;
}
extern_redirect! {
pub fn cuArray3DGetDescriptor_v2(
pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR,
@ -3169,6 +3455,9 @@ extern_redirect! {
flags: ::std::os::raw::c_uint,
) -> CUresult;
}
extern_redirect! {
pub fn cuStreamBeginCapture(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
}
extern_redirect! {
pub fn cuStreamBeginCapture_v2(hStream: CUstream, mode: CUstreamCaptureMode) -> CUresult;
}
@ -3205,6 +3494,9 @@ extern_redirect! {
extern_redirect! {
pub fn cuStreamSynchronize(hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuStreamDestroy(hStream: CUstream) -> CUresult;
}
extern_redirect! {
pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult;
}
@ -3244,6 +3536,9 @@ extern_redirect! {
extern_redirect! {
pub fn cuEventSynchronize(hEvent: CUevent) -> CUresult;
}
extern_redirect! {
pub fn cuEventDestroy(hEvent: CUevent) -> CUresult;
}
extern_redirect! {
pub fn cuEventDestroy_v2(hEvent: CUevent) -> CUresult;
}
@ -3666,6 +3961,15 @@ extern_redirect! {
extern_redirect! {
pub fn cuGraphDestroyNode(hNode: CUgraphNode) -> CUresult;
}
extern_redirect! {
pub fn cuGraphInstantiate(
phGraphExec: *mut CUgraphExec,
hGraph: CUgraph,
phErrorNode: *mut CUgraphNode,
logBuffer: *mut ::std::os::raw::c_char,
bufferSize: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuGraphInstantiate_v2(
phGraphExec: *mut CUgraphExec,
@ -3823,6 +4127,14 @@ extern_redirect! {
Flags: ::std::os::raw::c_uint,
) -> CUresult;
}
extern_redirect! {
pub fn cuTexRefSetAddress(
ByteOffset: *mut usize,
hTexRef: CUtexref,
dptr: CUdeviceptr,
bytes: usize,
) -> CUresult;
}
extern_redirect! {
pub fn cuTexRefSetAddress_v2(
ByteOffset: *mut usize,
@ -3881,6 +4193,9 @@ extern_redirect! {
extern_redirect! {
pub fn cuTexRefSetFlags(hTexRef: CUtexref, Flags: ::std::os::raw::c_uint) -> CUresult;
}
extern_redirect! {
pub fn cuTexRefGetAddress(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
}
extern_redirect! {
pub fn cuTexRefGetAddress_v2(pdptr: *mut CUdeviceptr, hTexRef: CUtexref) -> CUresult;
}
@ -4034,6 +4349,13 @@ extern_redirect! {
resource: CUgraphicsResource,
) -> CUresult;
}
extern_redirect! {
pub fn cuGraphicsResourceGetMappedPointer(
pDevPtr: *mut CUdeviceptr,
pSize: *mut usize,
resource: CUgraphicsResource,
) -> CUresult;
}
extern_redirect! {
pub fn cuGraphicsResourceGetMappedPointer_v2(
pDevPtr: *mut CUdeviceptr,
@ -4041,6 +4363,12 @@ extern_redirect! {
resource: CUgraphicsResource,
) -> CUresult;
}
extern_redirect! {
pub fn cuGraphicsResourceSetMapFlags(
resource: CUgraphicsResource,
flags: ::std::os::raw::c_uint,
) -> CUresult;
}
extern_redirect! {
pub fn cuGraphicsResourceSetMapFlags_v2(
resource: CUgraphicsResource,

View File

@ -1,5 +1,5 @@
use std::{
collections::HashMap,
collections::{BTreeMap, HashMap},
env,
error::Error,
ffi::{c_void, CStr},
@ -24,6 +24,10 @@ use regex::Regex;
#[cfg_attr(not(windows), path = "os_unix.rs")]
mod os;
const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
const CU_LAUNCH_PARAM_BUFFER_SIZE: *mut c_void = 2 as *mut _;
macro_rules! extern_redirect {
(pub fn $fn_name:ident ( $($arg_id:ident: $arg_type:ty),* $(,)? ) -> $ret_type:ty ;) => {
#[no_mangle]
@ -68,11 +72,18 @@ mod cuda;
pub static mut LIBCUDA_HANDLE: *mut c_void = ptr::null_mut();
pub static mut MODULES: Option<HashMap<CUmodule, ModuleDump>> = None;
pub static mut KERNELS: Option<HashMap<CUfunction, KernelDump>> = None;
pub static mut BUFFERS: Vec<(usize, usize)> = Vec::new();
static mut BUFFERS: Option<BTreeMap<usize, (usize, AllocLocation)>> = None;
pub static mut LAUNCH_COUNTER: usize = 0;
pub static mut KERNEL_PATTERN: Option<Regex> = None;
pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
#[derive(Clone, Copy)]
enum AllocLocation {
Device,
DeviceV2,
Host,
}
pub struct ModuleDump {
content: Rc<String>,
kernels_args: Option<HashMap<String, Vec<usize>>>,
@ -88,6 +99,9 @@ pub struct KernelDump {
// it's because CUDA Runtime API does dlopen to open libcuda.so, which ignores LD_PRELOAD
pub unsafe fn init_libcuda_handle() {
if LIBCUDA_HANDLE == ptr::null_mut() {
MODULES = Some(HashMap::new());
KERNELS = Some(HashMap::new());
BUFFERS = Some(BTreeMap::new());
let libcuda_handle = os::load_cuda_library();
assert_ne!(libcuda_handle, ptr::null_mut());
LIBCUDA_HANDLE = libcuda_handle;
@ -162,8 +176,7 @@ unsafe fn record_module_image(module: CUmodule, image: &str) {
None
}
};
let modules = MODULES.get_or_insert_with(|| HashMap::new());
modules.insert(
MODULES.as_mut().unwrap().insert(
module,
ModuleDump {
content: Rc::new(image.to_string()),
@ -251,8 +264,7 @@ unsafe fn cuModuleGetFunction(
} else {
None
};
let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new());
kernel_args_map.insert(
KERNELS.as_mut().unwrap().insert(
*hfunc,
KernelDump {
module_content: module_dump.content.clone(),
@ -272,16 +284,60 @@ unsafe fn cuModuleGetFunction(
CUresult::CUDA_SUCCESS
}
#[allow(non_snake_case)]
pub unsafe fn cuMemAlloc(
dptr: *mut CUdeviceptr,
bytesize: usize,
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
) -> CUresult {
cuMemAlloc_impl(false, dptr, bytesize, cont)
}
#[allow(non_snake_case)]
pub unsafe fn cuMemAlloc_v2(
dptr: *mut CUdeviceptr,
bytesize: usize,
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
) -> CUresult {
cuMemAlloc_impl(true, dptr, bytesize, cont)
}
#[allow(non_snake_case)]
pub unsafe fn cuMemAlloc_impl(
is_v2: bool,
dptr: *mut CUdeviceptr,
bytesize: usize,
cont: impl FnOnce(*mut CUdeviceptr, usize) -> CUresult,
) -> CUresult {
let result = cont(dptr, bytesize);
assert_eq!(result, CUresult::CUDA_SUCCESS);
let start = (*dptr).0 as usize;
BUFFERS.push((start, bytesize));
let location = if is_v2 {
AllocLocation::DeviceV2
} else {
AllocLocation::Device
};
BUFFERS
.as_mut()
.unwrap()
.insert(start, (bytesize, location));
CUresult::CUDA_SUCCESS
}
#[allow(non_snake_case)]
pub unsafe fn cuMemHostAlloc(
pp: *mut *mut c_void,
bytesize: usize,
flags: c_uint,
cont: impl FnOnce(*mut *mut c_void, usize, c_uint) -> CUresult,
) -> CUresult {
let result = cont(pp, bytesize, flags);
assert_eq!(result, CUresult::CUDA_SUCCESS);
let start = (*pp) as usize;
BUFFERS
.as_mut()
.unwrap()
.insert(start, (bytesize, AllocLocation::Host));
CUresult::CUDA_SUCCESS
}
@ -330,6 +386,7 @@ pub unsafe fn cuLaunchKernel(
blockDimZ,
sharedMemBytes,
kernelParams,
extra,
dump_env,
)
.unwrap_or_else(|err| os_log!("{}", err));
@ -353,6 +410,7 @@ pub unsafe fn cuLaunchKernel(
if let Some((_, kernel_dump)) = &dump_env {
dump_arguments(
kernelParams,
extra,
"post",
&kernel_dump.name,
LAUNCH_COUNTER,
@ -423,6 +481,7 @@ unsafe fn dump_pre_data(
blockDimZ: ::std::os::raw::c_uint,
sharedMemBytes: ::std::os::raw::c_uint,
kernelParams: *mut *mut ::std::os::raw::c_void,
extra: *mut *mut ::std::os::raw::c_void,
(dump_dir, kernel_dump): &(PathBuf, &'static KernelDump),
) -> Result<(), Box<dyn Error>> {
dump_launch_arguments(
@ -441,6 +500,7 @@ unsafe fn dump_pre_data(
module_file.write_all(kernel_dump.module_content.as_bytes())?;
dump_arguments(
kernelParams,
extra,
"pre",
&kernel_dump.name,
LAUNCH_COUNTER,
@ -449,8 +509,9 @@ unsafe fn dump_pre_data(
Ok(())
}
unsafe fn dump_arguments(
fn dump_arguments(
kernel_params: *mut *mut ::std::os::raw::c_void,
extra: *mut *mut ::std::os::raw::c_void,
prefix: &str,
kernel_name: &str,
counter: usize,
@ -467,14 +528,84 @@ unsafe fn dump_arguments(
fs::remove_dir_all(&dump_dir)?;
}
fs::create_dir_all(&dump_dir)?;
if kernel_params != ptr::null_mut() {
for (i, arg_len) in args.iter().enumerate() {
let dev_ptr = *(*kernel_params.add(i) as *mut usize);
match BUFFERS.iter().find(|(start, _)| *start == dev_ptr as usize) {
Some((start, len)) => {
let mut output = vec![0u8; *len];
let error =
cuda::cuMemcpyDtoH_v2(output.as_mut_ptr() as *mut _, CUdeviceptr(*start), *len);
unsafe { dump_argument_to_file(&dump_dir, i, *arg_len, *kernel_params.add(i))? };
}
} else {
let mut offset = 0;
let mut buffer_ptr = None;
let mut buffer_size = None;
loop {
match unsafe { *extra.add(offset) } {
CU_LAUNCH_PARAM_END => break,
CU_LAUNCH_PARAM_BUFFER_POINTER => {
buffer_ptr = Some(unsafe { *extra.add(offset + 1) as *mut u8 });
}
CU_LAUNCH_PARAM_BUFFER_SIZE => {
buffer_size = Some(unsafe { *(*extra.add(offset + 1) as *mut usize) });
}
_ => return Err("Malformed `extra` parameter to kernel launch")?,
}
offset += 2;
}
match (buffer_size, buffer_ptr) {
(Some(buffer_size), Some(buffer_ptr)) => {
let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
});
if buffer_size != sum_of_kernel_argument_sizes {
return Err("Malformed `extra` parameter to kernel launch")?;
}
let mut offset = 0;
for (i, arg_size) in args.iter().enumerate() {
let buffer_offset = round_up_to_multiple(offset, *arg_size);
unsafe {
dump_argument_to_file(
&dump_dir,
i,
*arg_size,
buffer_ptr.add(buffer_offset) as *const _,
)?
};
offset = buffer_offset + *arg_size;
}
}
_ => return Err("Malformed `extra` parameter to kernel launch")?,
}
}
Ok(())
}
fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
((x + multiple - 1) / multiple) * multiple
}
unsafe fn dump_argument_to_file(
dump_dir: &PathBuf,
i: usize,
arg_len: usize,
ptr: *const c_void,
) -> Result<(), Box<dyn Error>> {
// Don't check if arg_len == sizeof(void*), there are libraries
// which for some reason pass 32 pointers (4 bytes) in 8 byte arguments
match get_buffer_length(*(ptr as *mut usize)) {
Some((start, len, location)) => {
let mut output = vec![0u8; len];
let memcpy_fn = match location {
AllocLocation::Device => |src, dst: usize, len| {
let error = cuda::cuMemcpyDtoH(dst as *mut _, CUdeviceptr(src), len);
assert_eq!(error, CUresult::CUDA_SUCCESS);
},
AllocLocation::DeviceV2 => |src, dst: usize, len| {
let error = cuda::cuMemcpyDtoH_v2(dst as *mut _, CUdeviceptr(src), len);
assert_eq!(error, CUresult::CUDA_SUCCESS);
},
AllocLocation::Host => |src, dst: usize, len| {
ptr::copy_nonoverlapping(src as *mut u8, dst as *mut u8, len);
},
};
memcpy_fn(start, output.as_mut_ptr() as usize, len);
let mut path = dump_dir.clone();
path.push(format!("arg_{:03}.buffer", i));
let mut file = File::create(path)?;
@ -484,16 +615,28 @@ unsafe fn dump_arguments(
let mut path = dump_dir.clone();
path.push(format!("arg_{:03}", i));
let mut file = File::create(path)?;
file.write_all(slice::from_raw_parts(
*kernel_params.add(i) as *mut u8,
*arg_len,
))?;
}
file.write_all(slice::from_raw_parts(ptr as *mut u8, arg_len))?;
}
}
Ok(())
}
unsafe fn get_buffer_length(ptr: usize) -> Option<(usize, usize, AllocLocation)> {
BUFFERS
.as_mut()
.unwrap()
.range(..=ptr)
.next_back()
.and_then(|(start, (len, loc))| {
let end = *start + *len;
if ptr < end {
Some((ptr, end - ptr, *loc))
} else {
None
}
})
}
fn get_dump_dir() -> Result<PathBuf, Box<dyn Error>> {
let dir = env::var("ZLUDA_DUMP_DIR")?;
let mut main_dir = PathBuf::from(dir);

View File

@ -1,3 +1,4 @@
use std::os::windows::ffi::OsStrExt;
use std::path::Path;
use std::ptr;
use std::{env, ops::Deref};
@ -33,7 +34,7 @@ pub fn main_impl() -> Result<(), Box<dyn Error>> {
let injector_dir = injector_path.parent().unwrap();
let redirect_path = create_redirect_path(injector_dir);
let (mut inject_nvcuda_path, mut inject_nvml_path, cmd) =
create_inject_path(&args[1..], injector_dir);
create_inject_path(&args[1..], injector_dir)?;
let mut cmd_line = construct_command_line(cmd);
let mut startup_info = unsafe { mem::zeroed::<detours_sys::_STARTUPINFOW>() };
let mut proc_info = unsafe { mem::zeroed::<detours_sys::_PROCESS_INFORMATION>() };
@ -110,7 +111,7 @@ fn print_help_and_exit() -> ! {
{0} -- <EXE> [ARGS]...
{0} <DLL> -- <EXE> [ARGS]...
ARGS:
<DLL> DLL to ne injected instead of system nvcuda.dll, if not provided
<DLL> DLL to be injected instead of system nvcuda.dll, if not provided
will use nvcuda.dll from the directory where {0} is located
<EXE> Path to the executable to be injected with <DLL>
<ARGS>... Arguments that will be passed to <EXE>
@ -187,7 +188,7 @@ fn create_redirect_path(injector_dir: &Path) -> Vec<u8> {
fn create_inject_path<'a>(
args: &'a [String],
injector_dir: &Path,
) -> (Vec<u16>, Vec<u16>, &'a [String]) {
) -> std::io::Result<(Vec<u16>, Vec<u16>, &'a [String])> {
let injector_dir = injector_dir.to_path_buf();
let (nvcuda_path, unparsed_args) = if args.get(0).map(Deref::deref) == Some("--") {
(
@ -195,14 +196,13 @@ fn create_inject_path<'a>(
&args[1..],
)
} else if args.get(1).map(Deref::deref) == Some("--") {
let mut dll_path = args[0].encode_utf16().collect::<Vec<_>>();
dll_path.push(0);
let dll_path = make_absolute_and_encode(&args[0])?;
(dll_path, &args[2..])
} else {
print_help_and_exit()
};
let nvml_path = encode_file_in_directory_raw(injector_dir, ZLUDA_ML_DLL);
(nvcuda_path, nvml_path, unparsed_args)
Ok((nvcuda_path, nvml_path, unparsed_args))
}
fn encode_file_in_directory_raw(mut dir: PathBuf, file: &'static str) -> Vec<u16> {
@ -215,3 +215,16 @@ fn encode_file_in_directory_raw(mut dir: PathBuf, file: &'static str) -> Vec<u16
result.push(0);
result
}
fn make_absolute_and_encode(maybe_path: &str) -> std::io::Result<Vec<u16>> {
let path = Path::new(maybe_path);
let mut encoded_path = if path.is_relative() {
let mut current_dir = env::current_dir()?;
current_dir.push(path);
current_dir.as_os_str().encode_wide().collect::<Vec<_>>()
} else {
maybe_path.encode_utf16().collect::<Vec<_>>()
};
encoded_path.push(0);
Ok(encoded_path)
}