Make dumper compatible with older versions of CUDA

This commit is contained in:
Andrzej Janik
2021-04-10 23:01:01 +02:00
parent 8393dbd6e9
commit a39dda67d1
4 changed files with 171 additions and 69 deletions

View File

@ -75,7 +75,10 @@ unsafe extern "system" fn runtime_callback_hooks_fn1(ptr: *mut *mut usize, size:
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
unsafe extern "system" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 {
unsafe extern "system" fn runtime_callback_hooks_fn5(
ptr: *mut *mut u8,
size: *mut usize,
) -> *mut u8 {
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
@ -93,7 +96,9 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
VTableEntry {
length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
},
VTableEntry { ptr: ptr::null() },
VTableEntry {
ptr: get_module_from_cubin as *const (),
},
VTableEntry {
ptr: cudart_interface_fn1 as *const (),
},
@ -101,7 +106,7 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() },
VTableEntry {
ptr: get_module_from_cubin as *const (),
ptr: get_module_from_cubin_ext as *const (),
},
VTableEntry {
ptr: cudart_interface_fn6 as *const (),
@ -198,14 +203,7 @@ struct FatbinFileHeader {
unsafe extern "system" fn get_module_from_cubin(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
) -> CUresult {
// Not sure what those two parameters are actually used for,
// they are somehow involved in __cudaRegisterHostVar
if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
return CUresult::CUDA_ERROR_NOT_SUPPORTED;
}
if result == ptr::null_mut()
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC
|| (*fatbinc_wrapper).version != FATBINC_VERSION
@ -248,6 +246,21 @@ unsafe extern "system" fn get_module_from_cubin(
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
}
unsafe extern "system" fn get_module_from_cubin_ext(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
) -> CUresult {
// Not sure what those two parameters are actually used for,
// they are somehow involved in __cudaRegisterHostVar
if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
CUresult::CUDA_ERROR_NOT_SUPPORTED
} else {
get_module_from_cubin(result, fatbinc_wrapper)
}
}
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file;
let mut result = Vec::new();
@ -284,6 +297,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<
}
real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize);
if decompressed_vec.last().copied().unwrap_or(1) != 0 {
decompressed_vec.push(0);
}
return Some(decompressed_vec);
}
}

View File

@ -75,13 +75,13 @@ pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
pub struct ModuleDump {
content: Rc<String>,
kernels_args: HashMap<String, Vec<usize>>,
kernels_args: Option<HashMap<String, Vec<usize>>>,
}
pub struct KernelDump {
module_content: Rc<String>,
name: String,
arguments: Vec<usize>,
arguments: Option<Vec<usize>>,
}
// We are doing dlopen here instead of just using LD_PRELOAD,
@ -95,7 +95,7 @@ pub unsafe fn init_libcuda_handle() {
Ok(kernel_filter) => match Regex::new(&kernel_filter) {
Ok(r) => KERNEL_PATTERN = Some(r),
Err(err) => {
eprintln!("[ZLUDA_DUMP] Error parsing ZLUDA_DUMP_KERNEL: {}", err);
os_log!("Error parsing ZLUDA_DUMP_KERNEL: {}", err);
}
},
Err(_) => (),
@ -104,15 +104,15 @@ pub unsafe fn init_libcuda_handle() {
Ok(cc_override) => match str::parse::<i32>(&cc_override) {
Ok(ver) => OVERRIDE_COMPUTE_CAPABILITY_MAJOR = Some(ver),
Err(err) => {
eprintln!(
"[ZLUDA_DUMP] Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}",
os_log!(
"Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}",
err
);
}
},
Err(_) => (),
}
eprintln!("[ZLUDA_DUMP] Initialized");
os_log!("Initialized");
}
}
@ -131,29 +131,37 @@ pub unsafe fn cuModuleLoadData(
unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) {
if *(raw_image as *const u32) == 0x464c457f {
eprintln!("[ZLUDA_DUMP] Unsupported ELF module: {:?}", raw_image);
os_log!("Unsupported ELF module: {:?}", raw_image);
return;
}
let image = to_str(raw_image);
match image {
None => eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", raw_image),
None => os_log!("Malformed module image: {:?}", raw_image),
Some(image) => record_module_image(module, image),
};
}
unsafe fn record_module_image(module: CUmodule, image: &str) {
if !image.contains(&".address_size") {
eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", module)
os_log!("Malformed module image: {:?}", module)
} else {
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, image);
match (&*errors, ast) {
let kernels_args = match (&*errors, ast) {
(&[], Ok(ast)) => {
let kernels_args = ast
.directives
.iter()
.filter_map(directive_to_kernel)
.collect::<HashMap<_, _>>();
Some(kernels_args)
}
(_, _) => {
// Don't print errors - it's usually too verbose to be useful
os_log!("Errors when parsing module: {:?}", module);
None
}
};
let modules = MODULES.get_or_insert_with(|| HashMap::new());
modules.insert(
module,
@ -163,20 +171,6 @@ unsafe fn record_module_image(module: CUmodule, image: &str) {
},
);
}
(errs, ast) => {
let err_string = errs
.iter()
.map(|e| format!("{:?}", e))
.chain(ast.err().iter().map(|e| format!("{:?}", e)))
.collect::<Vec<_>>()
.join("\n");
eprintln!(
"[ZLUDA_DUMP] Errors when parsing module:\n---ERRORS---\n{}\n---MODULE---\n{}",
err_string, image
);
}
}
}
}
unsafe fn to_str<T>(image: *const T) -> Option<&'static str> {
@ -248,27 +242,32 @@ unsafe fn cuModuleGetFunction(
if let Some(modules) = &MODULES {
if let Some(module_dump) = modules.get(&hmod) {
if let Some(kernel) = to_str(name) {
if let Some(args) = module_dump.kernels_args.get(kernel) {
let kernel_args = KERNELS.get_or_insert_with(|| HashMap::new());
kernel_args.insert(
let kernel_args = if let Some(kernels) = &module_dump.kernels_args {
if let Some(args) = kernels.get(kernel) {
Some(args.clone())
} else {
None
}
} else {
None
};
let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new());
kernel_args_map.insert(
*hfunc,
KernelDump {
module_content: module_dump.content.clone(),
name: kernel.to_string(),
arguments: args.clone(),
arguments: kernel_args,
},
);
} else {
eprintln!("[ZLUDA_DUMP] Unknown kernel: {}", kernel);
os_log!("Malformed name at: {:?}", hfunc);
}
} else {
eprintln!("[ZLUDA_DUMP] Unknown kernel name at: {:?}", hfunc);
os_log!("Unknown module: {:?}", hmod);
}
} else {
eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod);
}
} else {
eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod);
os_log!("Unknown module: {:?}", hmod);
}
CUresult::CUDA_SUCCESS
}
@ -317,7 +316,7 @@ pub unsafe fn cuLaunchKernel(
let dump_env = match create_dump_dir(f, LAUNCH_COUNTER) {
Ok(dump_env) => dump_env,
Err(err) => {
eprintln!("[ZLUDA_DUMP] {:#?}", err);
os_log!("Error when creating the dump directory: {}", err);
None
}
};
@ -333,7 +332,7 @@ pub unsafe fn cuLaunchKernel(
kernelParams,
dump_env,
)
.unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err));
.unwrap_or_else(|err| os_log!("{}", err));
};
error = cont(
f,
@ -357,9 +356,9 @@ pub unsafe fn cuLaunchKernel(
"post",
&kernel_dump.name,
LAUNCH_COUNTER,
&kernel_dump.arguments,
kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
)
.unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err));
.unwrap_or_else(|err| os_log!("{}", err));
}
LAUNCH_COUNTER += 1;
CUresult::CUDA_SUCCESS
@ -445,7 +444,7 @@ unsafe fn dump_pre_data(
"pre",
&kernel_dump.name,
LAUNCH_COUNTER,
&kernel_dump.arguments,
kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
)?;
Ok(())
}
@ -455,8 +454,12 @@ unsafe fn dump_arguments(
prefix: &str,
kernel_name: &str,
counter: usize,
args: &[usize],
args: Option<&[usize]>,
) -> Result<(), Box<dyn Error>> {
let args = match args {
None => return Ok(()),
Some(a) => a,
};
let mut dump_dir = get_dump_dir()?;
dump_dir.push(format!("{:04}_{}", counter, kernel_name));
dump_dir.push(prefix);
@ -508,9 +511,16 @@ const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
],
};
const GET_MODULE_OFFSET: usize = 6;
static mut CUDART_INTERFACE_VTABLE: Vec<*const c_void> = Vec::new();
const GET_MODULE_FROM_CUBIN_OFFSET: usize = 1;
const GET_MODULE_FROM_CUBIN_EXT_OFFSET: usize = 6;
static mut ORIGINAL_GET_MODULE_FROM_CUBIN: Option<
unsafe extern "system" fn(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
) -> CUresult,
> = None;
static mut ORIGINAL_GET_MODULE_FROM_CUBIN_EXT: Option<
unsafe extern "system" fn(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
@ -539,16 +549,23 @@ pub unsafe fn cuGetExportTable(
CUDART_INTERFACE_VTABLE.as_mut_ptr(),
len,
);
if GET_MODULE_OFFSET >= len {
if GET_MODULE_FROM_CUBIN_EXT_OFFSET >= len {
return CUresult::CUDA_ERROR_UNKNOWN;
}
ORIGINAL_GET_MODULE_FROM_CUBIN =
mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET]);
CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET] = get_module_from_cubin as *const _;
mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET]);
CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET] =
get_module_from_cubin as *const _;
ORIGINAL_GET_MODULE_FROM_CUBIN_EXT =
mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET]);
CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET] =
get_module_from_cubin_ext as *const _;
}
*ppExportTable = CUDART_INTERFACE_VTABLE.as_ptr() as *const _;
return CUresult::CUDA_SUCCESS;
} else {
let guid = (*pExportTableId).bytes;
os_log!("Unsupported export table id: {{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]);
cont(ppExportTable, pExportTableId)
}
}
@ -598,11 +615,10 @@ struct FatbinFileHeader {
uncompressed_payload: c_ulong,
}
unsafe extern "system" fn get_module_from_cubin(
unsafe fn get_module_from_cubin_impl(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
get_module_base: impl FnOnce() -> CUresult,
) -> CUresult {
if module == ptr::null_mut()
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC
@ -628,7 +644,7 @@ unsafe extern "system" fn get_module_from_cubin(
}
};
}
let result = ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper, ptr1, ptr2);
let result = get_module_base();
if result != CUresult::CUDA_SUCCESS {
return result;
}
@ -644,6 +660,26 @@ unsafe extern "system" fn get_module_from_cubin(
result
}
unsafe extern "system" fn get_module_from_cubin(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
) -> CUresult {
get_module_from_cubin_impl(module, fatbinc_wrapper, || {
ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper)
})
}
unsafe extern "system" fn get_module_from_cubin_ext(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
) -> CUresult {
get_module_from_cubin_impl(module, fatbinc_wrapper, || {
ORIGINAL_GET_MODULE_FROM_CUBIN_EXT.unwrap()(module, fatbinc_wrapper, ptr1, ptr2)
})
}
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file;
let mut result = Vec::new();
@ -680,6 +716,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<
}
real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize);
if decompressed_vec.last().copied().unwrap_or(1) != 0 {
decompressed_vec.push(0);
}
return Some(decompressed_vec);
}
}

View File

@ -2,6 +2,8 @@ use std::ffi::{c_void, CStr};
const NVCUDA_DEFAULT_PATH: &'static [u8] = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
pub fn init() {}
pub unsafe fn load_cuda_library() -> *mut c_void {
libc::dlopen(
NVCUDA_DEFAULT_PATH.as_ptr() as *const _,
@ -12,3 +14,17 @@ pub unsafe fn load_cuda_library() -> *mut c_void {
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
libc::dlsym(handle, func.as_ptr() as *const _)
}
#[macro_export]
macro_rules! os_log {
($format:tt) => {
{
eprintln!($format);
}
};
($format:tt, $($obj: expr),+) => {
{
eprintln!($format, $($obj,)+);
}
};
}

View File

@ -5,9 +5,11 @@ use std::{
ptr,
};
use std::os::windows::io::AsRawHandle;
use wchar::wch_c;
use winapi::{
shared::minwindef::HMODULE,
um::debugapi::OutputDebugStringA,
um::libloaderapi::{GetProcAddress, LoadLibraryW},
};
@ -66,3 +68,32 @@ unsafe fn get_non_detoured_load_library(
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
GetProcAddress(handle as *mut _, func.as_ptr()) as *mut _
}
#[macro_export]
macro_rules! os_log {
($format:tt) => {
{
use crate::os::__log_impl;
__log_impl(format!($format));
}
};
($format:tt, $($obj: expr),+) => {
{
use crate::os::__log_impl;
__log_impl(format!($format, $($obj,)+));
}
};
}
pub fn __log_impl(s: String) {
let log_to_stderr = std::io::stderr().as_raw_handle() != ptr::null_mut();
if log_to_stderr {
eprintln!("[ZLUDA_DUMP] {}\n", s);
} else {
let mut win_str = String::with_capacity("[ZLUDA_DUMP] ".len() + s.len() + 2);
win_str.push_str("[ZLUDA_DUMP] ");
win_str.push_str(&s);
win_str.push_str("\n\0");
unsafe { OutputDebugStringA(win_str.as_ptr() as *const _) };
}
}