Address review comments

This commit is contained in:
Andrzej Janik
2025-08-01 00:01:46 +00:00
parent 117cb08341
commit b01cc29f7c
9 changed files with 64 additions and 50 deletions

1
Cargo.lock generated
View File

@ -1942,6 +1942,7 @@ dependencies = [
"format",
"goblin",
"libc",
"libloading",
"parking_lot",
"paste",
"ptx",

View File

@ -15,6 +15,6 @@ unwrap_or = "1.0.1"
linux_only = true
linux_symlinks = [
"zluda_preload",
"dump/zluda_preload",
"dump_nvidia/zluda_preload",
"trace/zluda_preload",
"trace_nvidia/zluda_preload",
]

View File

@ -2,7 +2,7 @@ This crate is a last resort Linux-specific solution.
Most of the time we can inject ourselves into a process by having users
set `LD_LIBRARY_PATH`.
Unfortunately, there is software out there which dynamically links to CUDA and
and CUDA performance libraries using RPATH. On Linux, dynamic linker operates
CUDA performance libraries using RPATH. On Linux, dynamic linker operates
using approximately this algorithm:
* If path contains `/` treat the name as a (possibly relative) path and just use it
* Otherwise return the first that succeeds:

View File

@ -7,6 +7,8 @@ use std::{
};
use unwrap_or::unwrap_some_or;
// Definition takes from `libc` crate:
// https://github.com/rust-lang/libc/blob/cf82fdf3f22ccfa98ba120efc50d5f39ab2d52ff/src/unix/linux_like/linux/mod.rs#L2682
const RTLD_NEXT: *mut c_void = -1isize as _;
unsafe extern "C" {
@ -39,13 +41,18 @@ static FILES_FOR_REDIRECT: [&'static str; 14] = [
"libnvidia-ml.so.1",
];
static GLOBALS: LazyLock<(
Option<unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult>,
Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
)> = LazyLock::new(|| {
// Global state, caching some computations that would be otherwise repeated on every `dlopen`
struct GlobalState {
/// The original `dlopen` implementation from libdl.
dlopen_next: Option<unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult>,
/// The full paths of the file names from `FILES_FOR_REDIRECT` that will be used for redirection
replacement_paths: Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
}
static GLOBAL_STATE: LazyLock<GlobalState> = LazyLock::new(|| {
let dlopen_next = unsafe { mem::transmute(dlsym(RTLD_NEXT, c"dlopen".as_ptr())) };
let mut self_dlinfo = unsafe { mem::zeroed::<DLInfo>() };
let self_dir = if unsafe { dladdr(dlopen as _, &mut self_dlinfo) } != 0 {
let replacement_paths = if unsafe { dladdr(dlopen as _, &mut self_dlinfo) } != 0 {
unsafe { CStr::from_ptr(self_dlinfo.dli_fname) }
.to_str()
.ok()
@ -63,7 +70,10 @@ static GLOBALS: LazyLock<(
} else {
None
};
(dlopen_next, self_dir)
GlobalState {
dlopen_next,
replacement_paths,
}
});
pub const RTLD_GLOBAL: c_int = 0x100;
@ -71,9 +81,18 @@ pub const RTLD_LAZY: c_int = 1;
#[ctor::ctor]
unsafe fn ctor() {
let (dlopen_next, replacement_paths) = &*GLOBALS;
let GlobalState {
dlopen_next,
replacement_paths,
} = &*GLOBAL_STATE;
let dlopen_next = unwrap_some_or!(dlopen_next, return);
let replacement_paths = unwrap_some_or!(replacement_paths, return);
// We preload the paths to the files we want to redirect, because
// * We don't control dynamic linking when loading dependencies. We hijack
// dlopen, but that only works if the dependency has been explicitly
// loaded with dlopen. It does not intercept the loading of the dependencies
// * The first step that dynamic linker does is check if the file is already
// loaded
for replacement in replacement_paths.into_iter() {
dlopen_next(replacement.as_ptr().cast(), RTLD_GLOBAL | RTLD_LAZY).ok();
}
@ -87,7 +106,10 @@ const _: fn() = || {
#[no_mangle]
unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenResult {
let (dlopen_next, replacement_paths) = &*GLOBALS;
let GlobalState {
dlopen_next,
replacement_paths,
} = &*GLOBAL_STATE;
let dlopen_next = dlopen_next.ok_or(())?;
dlopen_redirect(dlopen_next, replacement_paths, filename, flags)
.or_else(|| dlopen_next(filename, flags).ok())
@ -99,7 +121,7 @@ unsafe extern "C" fn zluda_dlopen_noredirect(
filename: *const c_char,
flags: c_int,
) -> DlopenResult {
let dlopen_next = GLOBALS.0.ok_or(())?;
let dlopen_next = GLOBAL_STATE.dlopen_next.ok_or(())?;
dlopen_next(filename, flags)
}

View File

@ -27,6 +27,7 @@ rustc-hash = "1.1.0"
cglue = "0.3.5"
zstd-safe = { version = "7.2.4", features = ["std"] }
unwrap_or = "1.0.1"
libloading = "0.8"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }

View File

@ -56,8 +56,11 @@ pub(crate) struct CudaDynamicFns {
impl CudaDynamicFns {
pub(crate) unsafe fn load_library(path: &str) -> Option<Self> {
let lib_handle = NonNull::new(os::load_library(path));
lib_handle.map(|lib_handle| CudaDynamicFns {
let lib: libloading::os::unix::Library = zluda_trace_common::dlopen_local_noredirect(path)
.ok()?
.into();
let lib_handle = NonNull::new(lib.into_raw())?;
Some(CudaDynamicFns {
lib_handle,
fn_table: CudaFnTable::default(),
})

View File

@ -1,24 +1,9 @@
use cuda_types::cuda::CUuuid;
use std::ffi::{c_char, c_int, c_void, CStr, CString};
use std::ffi::{c_void, CStr};
use std::mem;
pub(crate) const LIBCUDA_DEFAULT_PATH: &str = "/usr/lib/x86_64-linux-gnu/libcuda.so.1";
pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void {
let zluda_dlopen_noredirect =
libc::dlsym(libc::RTLD_DEFAULT, c"zluda_dlopen_noredirect".as_ptr());
let zluda_dlopen_noredirect = mem::transmute::<
_,
Option<unsafe extern "C" fn(*const c_char, c_int) -> *mut c_void>,
>(zluda_dlopen_noredirect);
let dlopen = zluda_dlopen_noredirect.unwrap_or(libc::dlopen);
let libcuda_path = CString::new(libcuda_path).unwrap();
dlopen(
libcuda_path.as_ptr() as *const _,
libc::RTLD_LOCAL | libc::RTLD_NOW,
)
}
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
libc::dlsym(handle, func.as_ptr() as *const _)
}

View File

@ -1,18 +1,16 @@
use cuda_types::cuda::CUuuid;
use std::os::windows::io::AsRawHandle;
use std::{
ffi::{c_void, CStr},
mem, ptr,
sync::LazyLock,
};
use std::os::windows::io::AsRawHandle;
use winapi::{
shared::minwindef::{FARPROC, HMODULE},
um::debugapi::OutputDebugStringA,
um::libloaderapi::{GetProcAddress, LoadLibraryW},
};
use cuda_types::cuda::CUuuid;
pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = "C:\\Windows\\System32\\nvcuda.dll";
const LOAD_LIBRARY_NO_REDIRECT: &'static [u8] = b"ZludaLoadLibraryW_NoRedirect\0";
const GET_PROC_ADDRESS_NO_REDIRECT: &'static [u8] = b"ZludaGetProcAddress_NoRedirect\0";
@ -73,14 +71,6 @@ impl PlatformLibrary {
}
}
pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void {
let libcuda_path_uf16 = libcuda_path
.encode_utf16()
.chain(std::iter::once(0))
.collect::<Vec<_>>();
(PLATFORM_LIBRARY.LoadLibraryW)(libcuda_path_uf16.as_ptr()) as _
}
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
(PLATFORM_LIBRARY.GetProcAddress)(handle as _, func.as_ptr() as _) as _
}

View File

@ -5,7 +5,7 @@ use cuda_types::{
cusparse::cusparseStatus_tConsts,
};
use dark_api::ByteVecFfi;
use std::{ffi::c_void, num::NonZero, ptr, sync::LazyLock};
use std::{borrow::Cow, ffi::c_void, num::NonZero, ptr, sync::LazyLock};
pub fn get_export_table() -> Option<::dark_api::zluda_trace::ZludaTraceInternal> {
static CU_GET_EXPORT_TABLE: LazyLock<
@ -38,15 +38,17 @@ fn open_driver() -> Result<libloading::Library, libloading::Error> {
os::open_driver()
}
pub fn dlopen_local_noredirect(path: String) -> Result<libloading::Library, libloading::Error> {
unsafe { os::dlopen_local_noredirect(path) }
pub fn dlopen_local_noredirect<'a>(
path: impl Into<Cow<'a, str>>,
) -> Result<libloading::Library, libloading::Error> {
unsafe { os::dlopen_local_noredirect(path.into()) }
}
#[cfg(unix)]
pub(crate) mod os {
use libc::{c_char, c_int};
use libloading::os;
use std::{ffi::c_void, mem};
use std::{borrow::Cow, ffi::c_void, mem};
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
unsafe {
@ -61,9 +63,19 @@ pub(crate) mod os {
}
}
pub unsafe fn dlopen_local_noredirect(
mut path: String,
pub unsafe fn dlopen_local_noredirect<'a>(
path: Cow<'a, str>,
) -> Result<libloading::Library, libloading::Error> {
fn terminate_with_nul<'a>(path: Cow<'a, str>) -> Cow<'a, str> {
let path = if !path.ends_with('\0') {
let mut path = path.into_owned();
path.push('\0');
Cow::Owned(path)
} else {
path
};
path
}
let zluda_dlopen_noredirect =
unsafe { libc::dlsym(libc::RTLD_DEFAULT, c"zluda_dlopen_noredirect".as_ptr()) };
let zluda_dlopen_noredirect = mem::transmute::<
@ -71,7 +83,7 @@ pub(crate) mod os {
Option<unsafe extern "C" fn(*const c_char, c_int) -> *mut c_void>,
>(zluda_dlopen_noredirect);
let dlopen = zluda_dlopen_noredirect.unwrap_or(libc::dlopen);
path.push('\0');
let path = terminate_with_nul(path);
Ok(libloading::os::unix::Library::from_raw(dlopen(
path.as_ptr().cast(),
os::unix::RTLD_LOCAL | os::unix::RTLD_LAZY,
@ -89,7 +101,7 @@ pub(crate) mod os {
}
pub unsafe fn dlopen_local_noredirect(
path: String,
path: Cow<'a, str>,
) -> Result<libloading::Library, libloading::Error> {
let driver = open_driver()?;
match driver.get::<unsafe extern "C" fn(*const u16) -> isize>(