Make dumper compatible with older versions of CUDA

This commit is contained in:
Andrzej Janik
2021-04-10 23:01:01 +02:00
parent 8393dbd6e9
commit a39dda67d1
4 changed files with 171 additions and 69 deletions

View File

@ -75,7 +75,10 @@ unsafe extern "system" fn runtime_callback_hooks_fn1(ptr: *mut *mut usize, size:
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2]; static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
unsafe extern "system" fn runtime_callback_hooks_fn5(ptr: *mut *mut u8, size: *mut usize) -> *mut u8 { unsafe extern "system" fn runtime_callback_hooks_fn5(
ptr: *mut *mut u8,
size: *mut usize,
) -> *mut u8 {
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr(); *ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len(); *size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr(); return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
@ -93,7 +96,9 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
VTableEntry { VTableEntry {
length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(), length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
}, },
VTableEntry { ptr: ptr::null() }, VTableEntry {
ptr: get_module_from_cubin as *const (),
},
VTableEntry { VTableEntry {
ptr: cudart_interface_fn1 as *const (), ptr: cudart_interface_fn1 as *const (),
}, },
@ -101,7 +106,7 @@ static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
VTableEntry { ptr: ptr::null() }, VTableEntry { ptr: ptr::null() },
VTableEntry { ptr: ptr::null() }, VTableEntry { ptr: ptr::null() },
VTableEntry { VTableEntry {
ptr: get_module_from_cubin as *const (), ptr: get_module_from_cubin_ext as *const (),
}, },
VTableEntry { VTableEntry {
ptr: cudart_interface_fn6 as *const (), ptr: cudart_interface_fn6 as *const (),
@ -198,14 +203,7 @@ struct FatbinFileHeader {
unsafe extern "system" fn get_module_from_cubin( unsafe extern "system" fn get_module_from_cubin(
result: *mut CUmodule, result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper, fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
) -> CUresult { ) -> CUresult {
// Not sure what those two parameters are actually used for,
// they are somehow involved in __cudaRegisterHostVar
if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
return CUresult::CUDA_ERROR_NOT_SUPPORTED;
}
if result == ptr::null_mut() if result == ptr::null_mut()
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC || (*fatbinc_wrapper).magic != FATBINC_MAGIC
|| (*fatbinc_wrapper).version != FATBINC_VERSION || (*fatbinc_wrapper).version != FATBINC_VERSION
@ -248,6 +246,21 @@ unsafe extern "system" fn get_module_from_cubin(
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
} }
unsafe extern "system" fn get_module_from_cubin_ext(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
) -> CUresult {
// Not sure what those two parameters are actually used for,
// they are somehow involved in __cudaRegisterHostVar
if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
CUresult::CUDA_ERROR_NOT_SUPPORTED
} else {
get_module_from_cubin(result, fatbinc_wrapper)
}
}
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> { unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file; let mut index = file;
let mut result = Vec::new(); let mut result = Vec::new();
@ -284,6 +297,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<
} }
real_decompressed_size => { real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize); decompressed_vec.truncate(real_decompressed_size as usize);
if decompressed_vec.last().copied().unwrap_or(1) != 0 {
decompressed_vec.push(0);
}
return Some(decompressed_vec); return Some(decompressed_vec);
} }
} }

View File

@ -75,13 +75,13 @@ pub static mut OVERRIDE_COMPUTE_CAPABILITY_MAJOR: Option<i32> = None;
pub struct ModuleDump { pub struct ModuleDump {
content: Rc<String>, content: Rc<String>,
kernels_args: HashMap<String, Vec<usize>>, kernels_args: Option<HashMap<String, Vec<usize>>>,
} }
pub struct KernelDump { pub struct KernelDump {
module_content: Rc<String>, module_content: Rc<String>,
name: String, name: String,
arguments: Vec<usize>, arguments: Option<Vec<usize>>,
} }
// We are doing dlopen here instead of just using LD_PRELOAD, // We are doing dlopen here instead of just using LD_PRELOAD,
@ -95,7 +95,7 @@ pub unsafe fn init_libcuda_handle() {
Ok(kernel_filter) => match Regex::new(&kernel_filter) { Ok(kernel_filter) => match Regex::new(&kernel_filter) {
Ok(r) => KERNEL_PATTERN = Some(r), Ok(r) => KERNEL_PATTERN = Some(r),
Err(err) => { Err(err) => {
eprintln!("[ZLUDA_DUMP] Error parsing ZLUDA_DUMP_KERNEL: {}", err); os_log!("Error parsing ZLUDA_DUMP_KERNEL: {}", err);
} }
}, },
Err(_) => (), Err(_) => (),
@ -104,15 +104,15 @@ pub unsafe fn init_libcuda_handle() {
Ok(cc_override) => match str::parse::<i32>(&cc_override) { Ok(cc_override) => match str::parse::<i32>(&cc_override) {
Ok(ver) => OVERRIDE_COMPUTE_CAPABILITY_MAJOR = Some(ver), Ok(ver) => OVERRIDE_COMPUTE_CAPABILITY_MAJOR = Some(ver),
Err(err) => { Err(err) => {
eprintln!( os_log!(
"[ZLUDA_DUMP] Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}", "Error parsing ZLUDA_OVERRIDE_COMPUTE_CAPABILITY_MAJOR: {}",
err err
); );
} }
}, },
Err(_) => (), Err(_) => (),
} }
eprintln!("[ZLUDA_DUMP] Initialized"); os_log!("Initialized");
} }
} }
@ -131,51 +131,45 @@ pub unsafe fn cuModuleLoadData(
unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) { unsafe fn record_module_image_raw(module: CUmodule, raw_image: *const ::std::os::raw::c_void) {
if *(raw_image as *const u32) == 0x464c457f { if *(raw_image as *const u32) == 0x464c457f {
eprintln!("[ZLUDA_DUMP] Unsupported ELF module: {:?}", raw_image); os_log!("Unsupported ELF module: {:?}", raw_image);
return; return;
} }
let image = to_str(raw_image); let image = to_str(raw_image);
match image { match image {
None => eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", raw_image), None => os_log!("Malformed module image: {:?}", raw_image),
Some(image) => record_module_image(module, image), Some(image) => record_module_image(module, image),
}; };
} }
unsafe fn record_module_image(module: CUmodule, image: &str) { unsafe fn record_module_image(module: CUmodule, image: &str) {
if !image.contains(&".address_size") { if !image.contains(&".address_size") {
eprintln!("[ZLUDA_DUMP] Malformed module image: {:?}", module) os_log!("Malformed module image: {:?}", module)
} else { } else {
let mut errors = Vec::new(); let mut errors = Vec::new();
let ast = ptx::ModuleParser::new().parse(&mut errors, image); let ast = ptx::ModuleParser::new().parse(&mut errors, image);
match (&*errors, ast) { let kernels_args = match (&*errors, ast) {
(&[], Ok(ast)) => { (&[], Ok(ast)) => {
let kernels_args = ast let kernels_args = ast
.directives .directives
.iter() .iter()
.filter_map(directive_to_kernel) .filter_map(directive_to_kernel)
.collect::<HashMap<_, _>>(); .collect::<HashMap<_, _>>();
let modules = MODULES.get_or_insert_with(|| HashMap::new()); Some(kernels_args)
modules.insert(
module,
ModuleDump {
content: Rc::new(image.to_string()),
kernels_args,
},
);
} }
(errs, ast) => { (_, _) => {
let err_string = errs // Don't print errors - it's usually too verbose to be useful
.iter() os_log!("Errors when parsing module: {:?}", module);
.map(|e| format!("{:?}", e)) None
.chain(ast.err().iter().map(|e| format!("{:?}", e)))
.collect::<Vec<_>>()
.join("\n");
eprintln!(
"[ZLUDA_DUMP] Errors when parsing module:\n---ERRORS---\n{}\n---MODULE---\n{}",
err_string, image
);
} }
} };
let modules = MODULES.get_or_insert_with(|| HashMap::new());
modules.insert(
module,
ModuleDump {
content: Rc::new(image.to_string()),
kernels_args,
},
);
} }
} }
@ -248,27 +242,32 @@ unsafe fn cuModuleGetFunction(
if let Some(modules) = &MODULES { if let Some(modules) = &MODULES {
if let Some(module_dump) = modules.get(&hmod) { if let Some(module_dump) = modules.get(&hmod) {
if let Some(kernel) = to_str(name) { if let Some(kernel) = to_str(name) {
if let Some(args) = module_dump.kernels_args.get(kernel) { let kernel_args = if let Some(kernels) = &module_dump.kernels_args {
let kernel_args = KERNELS.get_or_insert_with(|| HashMap::new()); if let Some(args) = kernels.get(kernel) {
kernel_args.insert( Some(args.clone())
*hfunc, } else {
KernelDump { None
module_content: module_dump.content.clone(), }
name: kernel.to_string(),
arguments: args.clone(),
},
);
} else { } else {
eprintln!("[ZLUDA_DUMP] Unknown kernel: {}", kernel); None
} };
let kernel_args_map = KERNELS.get_or_insert_with(|| HashMap::new());
kernel_args_map.insert(
*hfunc,
KernelDump {
module_content: module_dump.content.clone(),
name: kernel.to_string(),
arguments: kernel_args,
},
);
} else { } else {
eprintln!("[ZLUDA_DUMP] Unknown kernel name at: {:?}", hfunc); os_log!("Malformed name at: {:?}", hfunc);
} }
} else { } else {
eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod); os_log!("Unknown module: {:?}", hmod);
} }
} else { } else {
eprintln!("[ZLUDA_DUMP] Unknown module: {:?}", hmod); os_log!("Unknown module: {:?}", hmod);
} }
CUresult::CUDA_SUCCESS CUresult::CUDA_SUCCESS
} }
@ -317,7 +316,7 @@ pub unsafe fn cuLaunchKernel(
let dump_env = match create_dump_dir(f, LAUNCH_COUNTER) { let dump_env = match create_dump_dir(f, LAUNCH_COUNTER) {
Ok(dump_env) => dump_env, Ok(dump_env) => dump_env,
Err(err) => { Err(err) => {
eprintln!("[ZLUDA_DUMP] {:#?}", err); os_log!("Error when creating the dump directory: {}", err);
None None
} }
}; };
@ -333,7 +332,7 @@ pub unsafe fn cuLaunchKernel(
kernelParams, kernelParams,
dump_env, dump_env,
) )
.unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err)); .unwrap_or_else(|err| os_log!("{}", err));
}; };
error = cont( error = cont(
f, f,
@ -357,9 +356,9 @@ pub unsafe fn cuLaunchKernel(
"post", "post",
&kernel_dump.name, &kernel_dump.name,
LAUNCH_COUNTER, LAUNCH_COUNTER,
&kernel_dump.arguments, kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
) )
.unwrap_or_else(|err| eprintln!("[ZLUDA_DUMP] {:#?}", err)); .unwrap_or_else(|err| os_log!("{}", err));
} }
LAUNCH_COUNTER += 1; LAUNCH_COUNTER += 1;
CUresult::CUDA_SUCCESS CUresult::CUDA_SUCCESS
@ -445,7 +444,7 @@ unsafe fn dump_pre_data(
"pre", "pre",
&kernel_dump.name, &kernel_dump.name,
LAUNCH_COUNTER, LAUNCH_COUNTER,
&kernel_dump.arguments, kernel_dump.arguments.as_ref().map(|vec| &vec[..]),
)?; )?;
Ok(()) Ok(())
} }
@ -455,8 +454,12 @@ unsafe fn dump_arguments(
prefix: &str, prefix: &str,
kernel_name: &str, kernel_name: &str,
counter: usize, counter: usize,
args: &[usize], args: Option<&[usize]>,
) -> Result<(), Box<dyn Error>> { ) -> Result<(), Box<dyn Error>> {
let args = match args {
None => return Ok(()),
Some(a) => a,
};
let mut dump_dir = get_dump_dir()?; let mut dump_dir = get_dump_dir()?;
dump_dir.push(format!("{:04}_{}", counter, kernel_name)); dump_dir.push(format!("{:04}_{}", counter, kernel_name));
dump_dir.push(prefix); dump_dir.push(prefix);
@ -508,9 +511,16 @@ const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
], ],
}; };
const GET_MODULE_OFFSET: usize = 6;
static mut CUDART_INTERFACE_VTABLE: Vec<*const c_void> = Vec::new(); static mut CUDART_INTERFACE_VTABLE: Vec<*const c_void> = Vec::new();
const GET_MODULE_FROM_CUBIN_OFFSET: usize = 1;
const GET_MODULE_FROM_CUBIN_EXT_OFFSET: usize = 6;
static mut ORIGINAL_GET_MODULE_FROM_CUBIN: Option< static mut ORIGINAL_GET_MODULE_FROM_CUBIN: Option<
unsafe extern "system" fn(
result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
) -> CUresult,
> = None;
static mut ORIGINAL_GET_MODULE_FROM_CUBIN_EXT: Option<
unsafe extern "system" fn( unsafe extern "system" fn(
result: *mut CUmodule, result: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper, fatbinc_wrapper: *const FatbincWrapper,
@ -539,16 +549,23 @@ pub unsafe fn cuGetExportTable(
CUDART_INTERFACE_VTABLE.as_mut_ptr(), CUDART_INTERFACE_VTABLE.as_mut_ptr(),
len, len,
); );
if GET_MODULE_OFFSET >= len { if GET_MODULE_FROM_CUBIN_EXT_OFFSET >= len {
return CUresult::CUDA_ERROR_UNKNOWN; return CUresult::CUDA_ERROR_UNKNOWN;
} }
ORIGINAL_GET_MODULE_FROM_CUBIN = ORIGINAL_GET_MODULE_FROM_CUBIN =
mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET]); mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET]);
CUDART_INTERFACE_VTABLE[GET_MODULE_OFFSET] = get_module_from_cubin as *const _; CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_OFFSET] =
get_module_from_cubin as *const _;
ORIGINAL_GET_MODULE_FROM_CUBIN_EXT =
mem::transmute(CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET]);
CUDART_INTERFACE_VTABLE[GET_MODULE_FROM_CUBIN_EXT_OFFSET] =
get_module_from_cubin_ext as *const _;
} }
*ppExportTable = CUDART_INTERFACE_VTABLE.as_ptr() as *const _; *ppExportTable = CUDART_INTERFACE_VTABLE.as_ptr() as *const _;
return CUresult::CUDA_SUCCESS; return CUresult::CUDA_SUCCESS;
} else { } else {
let guid = (*pExportTableId).bytes;
os_log!("Unsupported export table id: {{{:02X}{:02X}{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}-{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}}}", guid[0], guid[1], guid[2], guid[3], guid[4], guid[5], guid[6], guid[7], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15]);
cont(ppExportTable, pExportTableId) cont(ppExportTable, pExportTableId)
} }
} }
@ -598,11 +615,10 @@ struct FatbinFileHeader {
uncompressed_payload: c_ulong, uncompressed_payload: c_ulong,
} }
unsafe extern "system" fn get_module_from_cubin( unsafe fn get_module_from_cubin_impl(
module: *mut CUmodule, module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper, fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void, get_module_base: impl FnOnce() -> CUresult,
ptr2: *mut c_void,
) -> CUresult { ) -> CUresult {
if module == ptr::null_mut() if module == ptr::null_mut()
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC || (*fatbinc_wrapper).magic != FATBINC_MAGIC
@ -628,7 +644,7 @@ unsafe extern "system" fn get_module_from_cubin(
} }
}; };
} }
let result = ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper, ptr1, ptr2); let result = get_module_base();
if result != CUresult::CUDA_SUCCESS { if result != CUresult::CUDA_SUCCESS {
return result; return result;
} }
@ -644,6 +660,26 @@ unsafe extern "system" fn get_module_from_cubin(
result result
} }
unsafe extern "system" fn get_module_from_cubin(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
) -> CUresult {
get_module_from_cubin_impl(module, fatbinc_wrapper, || {
ORIGINAL_GET_MODULE_FROM_CUBIN.unwrap()(module, fatbinc_wrapper)
})
}
unsafe extern "system" fn get_module_from_cubin_ext(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
ptr1: *mut c_void,
ptr2: *mut c_void,
) -> CUresult {
get_module_from_cubin_impl(module, fatbinc_wrapper, || {
ORIGINAL_GET_MODULE_FROM_CUBIN_EXT.unwrap()(module, fatbinc_wrapper, ptr1, ptr2)
})
}
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> { unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
let mut index = file; let mut index = file;
let mut result = Vec::new(); let mut result = Vec::new();
@ -680,6 +716,9 @@ unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<
} }
real_decompressed_size => { real_decompressed_size => {
decompressed_vec.truncate(real_decompressed_size as usize); decompressed_vec.truncate(real_decompressed_size as usize);
if decompressed_vec.last().copied().unwrap_or(1) != 0 {
decompressed_vec.push(0);
}
return Some(decompressed_vec); return Some(decompressed_vec);
} }
} }

View File

@ -2,6 +2,8 @@ use std::ffi::{c_void, CStr};
const NVCUDA_DEFAULT_PATH: &'static [u8] = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0"; const NVCUDA_DEFAULT_PATH: &'static [u8] = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
pub fn init() {}
pub unsafe fn load_cuda_library() -> *mut c_void { pub unsafe fn load_cuda_library() -> *mut c_void {
libc::dlopen( libc::dlopen(
NVCUDA_DEFAULT_PATH.as_ptr() as *const _, NVCUDA_DEFAULT_PATH.as_ptr() as *const _,
@ -12,3 +14,17 @@ pub unsafe fn load_cuda_library() -> *mut c_void {
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void { pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
libc::dlsym(handle, func.as_ptr() as *const _) libc::dlsym(handle, func.as_ptr() as *const _)
} }
#[macro_export]
macro_rules! os_log {
($format:tt) => {
{
eprintln!($format);
}
};
($format:tt, $($obj: expr),+) => {
{
eprintln!($format, $($obj,)+);
}
};
}

View File

@ -5,9 +5,11 @@ use std::{
ptr, ptr,
}; };
use std::os::windows::io::AsRawHandle;
use wchar::wch_c; use wchar::wch_c;
use winapi::{ use winapi::{
shared::minwindef::HMODULE, shared::minwindef::HMODULE,
um::debugapi::OutputDebugStringA,
um::libloaderapi::{GetProcAddress, LoadLibraryW}, um::libloaderapi::{GetProcAddress, LoadLibraryW},
}; };
@ -66,3 +68,32 @@ unsafe fn get_non_detoured_load_library(
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void { pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
GetProcAddress(handle as *mut _, func.as_ptr()) as *mut _ GetProcAddress(handle as *mut _, func.as_ptr()) as *mut _
} }
#[macro_export]
macro_rules! os_log {
($format:tt) => {
{
use crate::os::__log_impl;
__log_impl(format!($format));
}
};
($format:tt, $($obj: expr),+) => {
{
use crate::os::__log_impl;
__log_impl(format!($format, $($obj,)+));
}
};
}
pub fn __log_impl(s: String) {
let log_to_stderr = std::io::stderr().as_raw_handle() != ptr::null_mut();
if log_to_stderr {
eprintln!("[ZLUDA_DUMP] {}\n", s);
} else {
let mut win_str = String::with_capacity("[ZLUDA_DUMP] ".len() + s.len() + 2);
win_str.push_str("[ZLUDA_DUMP] ");
win_str.push_str(&s);
win_str.push_str("\n\0");
unsafe { OutputDebugStringA(win_str.as_ptr() as *const _) };
}
}