Add README, fix Windows builds

This commit is contained in:
Andrzej Janik
2025-07-30 16:21:55 -07:00
parent b1db397979
commit dbc7271863
3 changed files with 48 additions and 10 deletions

View File

@ -33,9 +33,7 @@ fn open_driver() -> Result<libloading::Library, libloading::Error> {
os::open_driver()
}
pub fn dlopen_local_noredirect(
path: String,
) -> Result<libloading::Library, libloading::Error> {
pub fn dlopen_local_noredirect(path: String) -> Result<libloading::Library, libloading::Error> {
unsafe { os::dlopen_local_noredirect(path) }
}
@ -82,7 +80,26 @@ pub(crate) mod os {
use libloading::os;
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
unsafe { os::windows::Library::open_already_loaded("nvcuda").map(Into::into) }
os::windows::Library::open_already_loaded("nvcuda").map(Into::into)
}
pub unsafe fn dlopen_local_noredirect(
path: String,
) -> Result<libloading::Library, libloading::Error> {
let driver = open_driver()?;
match driver.get::<unsafe extern "C" fn(*const u16) -> isize>(
c"ZludaLoadLibraryW_NoRedirect".to_bytes_with_nul(),
) {
Ok(load_library) => {
let symbol = load_library(path.encode_utf16().collect::<Vec<u16>>().as_ptr());
if symbol == 0 {
Err(libloading::Error::LoadLibraryExWUnknown)
} else {
Ok(libloading::os::windows::Library::from_raw(symbol).into())
}
}
Err(_) => libloading::Library::new(path),
}
}
}

21
zluda_preload/README.md Normal file
View File

@ -0,0 +1,21 @@
This crate is a last resort Linux-specific solution.
Most of the time we can inject ourselves into a process by having users
set `LD_LIBRARY_PATH`.
Unfortunately, there is software out there which dynamically links to CUDA and
and CUDA performance libraries using RPATH. On Linux, dynamic linker operates
using approximately this algorithm:
* If path contains `/` treat the name as a (possibly relative) path and just use it
* Otherwise return the first that succeeds:
* Library with this name already loaded into the process
* Try paths in `DT_RPATH` (if `DT_RUNPATH` is not present)
* Try paths in `LD_LIBRARY_PATH`
* Try paths in `DT_RUNPATH`
* Try system paths
In order to defeat `DT_RPATH` this library needs to be preloaded with `LD_PRELOAD`.
On initialization we also preload all the performance libraries. We also hijack
`dlopen` and on every call to `dlopen` that tries to open a CUDA library we
redirect it to our libraries
We also expose `zluda_dlopen_noredirect` for the purpose of tracing libraries
so they can load real underlying library and not just get redirected to themselves

View File

@ -40,7 +40,7 @@ static FILES_FOR_REDIRECT: [&'static str; 14] = [
];
static GLOBALS: LazyLock<(
Option<unsafe extern "C" fn(*const c_char, c_int) -> DlopenReturn>,
Option<unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult>,
Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
)> = LazyLock::new(|| {
let dlopen_next = unsafe { mem::transmute(dlsym(RTLD_NEXT, c"dlopen".as_ptr())) };
@ -79,14 +79,14 @@ unsafe fn ctor() {
}
}
type DlopenReturn = Result<NonNull<c_void>, ()>;
type DlopenResult = Result<NonNull<c_void>, ()>;
const _: fn() = || {
let _ = std::mem::transmute::<*mut c_void, DlopenReturn>;
let _ = std::mem::transmute::<*mut c_void, DlopenResult>;
};
#[no_mangle]
unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenReturn {
unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenResult {
let (dlopen_next, replacement_paths) = &*GLOBALS;
let dlopen_next = dlopen_next.ok_or(())?;
dlopen_redirect(dlopen_next, replacement_paths, filename, flags)
@ -98,13 +98,13 @@ unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenRetu
unsafe extern "C" fn zluda_dlopen_noredirect(
filename: *const c_char,
flags: c_int,
) -> DlopenReturn {
) -> DlopenResult {
let dlopen_next = GLOBALS.0.ok_or(())?;
dlopen_next(filename, flags)
}
unsafe fn dlopen_redirect<'a>(
dlopen_next: unsafe extern "C" fn(*const c_char, c_int) -> DlopenReturn,
dlopen_next: unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult,
replacement_paths: &'a Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
input_path: *const c_char,
flags: c_int,