From cd7e2f8e36e2600c98a04c0619e0d20b49f0a96a Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Fri, 1 Aug 2025 03:00:13 +0200 Subject: [PATCH] Force loading ZLUDA through LD_PRELOAD (#447) Certain applications (pytorch) decide that it's a great idea to distribute whole CUDA driver and link to it with DT_RPATH. This igores LD_LIBRARY_PATH. This code defeats that evil mechanism through any means necessary --- Cargo.lock | 45 +++++++++- Cargo.toml | 1 + xtask/src/main.rs | 5 ++ zluda/Cargo.toml | 3 +- zluda/src/impl/memory.rs | 3 +- zluda_preload/Cargo.toml | 20 +++++ zluda_preload/README.md | 21 +++++ zluda_preload/src/lib.rs | 150 ++++++++++++++++++++++++++++++++++ zluda_trace/Cargo.toml | 1 + zluda_trace/src/lib.rs | 4 +- zluda_trace/src/os_unix.rs | 16 ++-- zluda_trace/src/os_win.rs | 24 +++--- zluda_trace_blas/src/lib.rs | 2 +- zluda_trace_blaslt/src/lib.rs | 2 +- zluda_trace_common/Cargo.toml | 3 + zluda_trace_common/src/lib.rs | 81 ++++++++++++++++-- zluda_trace_dnn/src/lib.rs | 2 +- zluda_trace_fft/src/lib.rs | 2 +- zluda_trace_sparse/src/lib.rs | 2 +- 19 files changed, 347 insertions(+), 40 deletions(-) create mode 100644 zluda_preload/Cargo.toml create mode 100644 zluda_preload/README.md create mode 100644 zluda_preload/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 8c663b3..991dd57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -300,6 +300,22 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "ctor" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6" +dependencies = [ + "ctor-proc-macro", + "dtor 0.0.6", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2" + [[package]] name = "cuda_macros" version = "0.0.0" @@ -406,7 +422,16 @@ version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8" dependencies = [ - "dtor-proc-macro", + "dtor-proc-macro 0.0.5", +] + +[[package]] +name = "dtor" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbc66182e62c4e716e2d70f97beceea0de798923f8ca48fb82aa3134dc3cae12" +dependencies = [ + "dtor-proc-macro 0.0.6", ] [[package]] @@ -415,6 +440,12 @@ version = "0.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055" +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "dynasm" version = "1.2.3" @@ -1781,7 +1812,7 @@ dependencies = [ "cuda_macros", "cuda_types", "dark_api", - "dtor", + "dtor 0.0.7", "hip_runtime-sys", "lazy_static", "libc", @@ -1874,6 +1905,14 @@ dependencies = [ "cuda_types", ] +[[package]] +name = "zluda_preload" +version = "0.0.0" +dependencies = [ + "ctor", + "unwrap_or", +] + [[package]] name = "zluda_redirect" version = "0.0.0" @@ -1905,6 +1944,7 @@ dependencies = [ "format", "goblin", "libc", + "libloading", "parking_lot", "paste", "ptx", @@ -1956,6 +1996,7 @@ dependencies = [ "cuda_types", "dark_api", "format", + "libc", "libloading", ] diff --git a/Cargo.toml b/Cargo.toml index 831e950..725a7be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ members = [ "zluda_fft", "zluda_inject", "zluda_ml", + "zluda_preload", "zluda_redirect", "zluda_sparse", ] diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 8d5e0be..93a9ad7 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -155,6 +155,8 @@ struct Metadata { #[derive(Deserialize)] #[serde(deny_unknown_fields)] struct ZludaMetadata { + #[serde(default)] + linux_only: bool, #[serde(default)] windows_only: bool, #[serde(default)] @@ -192,6 +194,9 @@ fn compile(b: Build) -> (PathBuf, String, Vec) { .into_iter() .filter_map(Project::try_new) .filter(|project| { + if project.meta.linux_only && cfg!(windows) { + return false; + } if project.meta.windows_only && cfg!(not(windows)) { return false; } diff --git a/zluda/Cargo.toml b/zluda/Cargo.toml index 664401c..5b0a1a9 100644 --- a/zluda/Cargo.toml +++ b/zluda/Cargo.toml @@ -22,7 +22,6 @@ lz4-sys = "1.9" tempfile = "3" paste = "1.0" rustc-hash = "1.1" -dtor = "0.0.6" zluda_common = { path = "../zluda_common" } [target.'cfg(windows)'.dependencies] @@ -30,7 +29,7 @@ winapi = { version = "0.3", features = ["heapapi", "std"] } [target.'cfg(not(windows))'.dependencies] libc = "0.2" -dtor = "0.0.6" +dtor = "0.0.7" [package.metadata.zluda] linux_symlinks = [ diff --git a/zluda/src/impl/memory.rs b/zluda/src/impl/memory.rs index b10ba75..19244b8 100644 --- a/zluda/src/impl/memory.rs +++ b/zluda/src/impl/memory.rs @@ -1,5 +1,4 @@ use hip_runtime_sys::*; -use std::mem; pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t { unsafe { hipMalloc(dptr.cast(), bytesize) }?; @@ -36,7 +35,7 @@ pub(crate) fn get_address_range_v2( } pub(crate) fn set_d32_v2(dst: hipDeviceptr_t, ui: ::core::ffi::c_uint, n: usize) -> hipError_t { - unsafe { hipMemsetD32(dst, mem::transmute(ui), n) } + unsafe { hipMemsetD32(dst, ui as std::ffi::c_int, n) } } pub(crate) fn set_d8_v2(dst: hipDeviceptr_t, value: ::core::ffi::c_uchar, n: usize) -> hipError_t { diff --git a/zluda_preload/Cargo.toml b/zluda_preload/Cargo.toml new file mode 100644 index 0000000..ea9fa71 --- /dev/null +++ b/zluda_preload/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "zluda_preload" +version = "0.0.0" +authors = ["Andrzej Janik "] +edition = "2021" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +ctor = "0.4.3" +unwrap_or = "1.0.1" + +[package.metadata.zluda] +linux_only = true +linux_symlinks = [ + "zluda_preload", + "trace/zluda_preload", + "trace_nvidia/zluda_preload", +] diff --git a/zluda_preload/README.md b/zluda_preload/README.md new file mode 100644 index 0000000..d5b28a4 --- /dev/null +++ b/zluda_preload/README.md @@ -0,0 +1,21 @@ +This crate is a last resort Linux-specific solution. +Most of the time we can inject ourselves into a process by having users +set `LD_LIBRARY_PATH`. +Unfortunately, there is software out there which dynamically links to CUDA and +CUDA performance libraries using RPATH. On Linux, dynamic linker operates +using approximately this algorithm: +* If path contains `/` treat the name as a (possibly relative) path and just use it +* Otherwise return the first that succeeds: + * Library with this name already loaded into the process + * Try paths in `DT_RPATH` (if `DT_RUNPATH` is not present) + * Try paths in `LD_LIBRARY_PATH` + * Try paths in `DT_RUNPATH` + * Try system paths + +In order to defeat `DT_RPATH` this library needs to be preloaded with `LD_PRELOAD`. +On initialization we also preload all the performance libraries. We also hijack +`dlopen` and on every call to `dlopen` that tries to open a CUDA library we +redirect it to our libraries + +We also expose `zluda_dlopen_noredirect` for the purpose of tracing libraries +so they can load real underlying library and not just get redirected to themselves diff --git a/zluda_preload/src/lib.rs b/zluda_preload/src/lib.rs new file mode 100644 index 0000000..5e57be2 --- /dev/null +++ b/zluda_preload/src/lib.rs @@ -0,0 +1,150 @@ +use std::{ + ffi::{c_char, c_int, c_void, CStr}, + mem, + path::PathBuf, + ptr::{self, NonNull}, + sync::LazyLock, +}; +use unwrap_or::unwrap_some_or; + +// Definition takes from `libc` crate: +// https://github.com/rust-lang/libc/blob/cf82fdf3f22ccfa98ba120efc50d5f39ab2d52ff/src/unix/linux_like/linux/mod.rs#L2682 +const RTLD_NEXT: *mut c_void = -1isize as _; + +unsafe extern "C" { + fn dlsym(handle: *mut c_void, symbol: *const c_char) -> *mut c_void; + fn dladdr(addr: *const c_void, info: *mut DLInfo) -> c_int; +} + +#[repr(C)] +struct DLInfo { + dli_fname: *const c_char, + dli_fbase: *mut c_void, + dli_sname: *const c_char, + dli_saddr: *mut c_void, +} + +static FILES_FOR_REDIRECT: [&'static str; 14] = [ + "libcublas.so", + "libcublas.so.12", + "libcublasLt.so", + "libcublasLt.so.12", + "libcuda.so", + "libcuda.so.1", + "libcudnn.so", + "libcudnn.so.9", + "libcufft.so", + "libcufft.so.11", + "libcusparse.so", + "libcusparse.so.12", + "libnvidia-ml.so", + "libnvidia-ml.so.1", +]; + +// Global state, caching some computations that would be otherwise repeated on every `dlopen` +struct GlobalState { + /// The original `dlopen` implementation from libdl. + dlopen_next: Option DlopenResult>, + /// The full paths of the file names from `FILES_FOR_REDIRECT` that will be used for redirection + replacement_paths: Option<[Vec; FILES_FOR_REDIRECT.len()]>, +} + +static GLOBAL_STATE: LazyLock = LazyLock::new(|| { + let dlopen_next = unsafe { mem::transmute(dlsym(RTLD_NEXT, c"dlopen".as_ptr())) }; + let mut self_dlinfo = unsafe { mem::zeroed::() }; + let replacement_paths = if unsafe { dladdr(dlopen as _, &mut self_dlinfo) } != 0 { + unsafe { CStr::from_ptr(self_dlinfo.dli_fname) } + .to_str() + .ok() + .and_then(|path| { + let mut pathbuf = PathBuf::from(path); + if !pathbuf.pop() { + return None; + } + Some(FILES_FOR_REDIRECT.map(|file| { + let mut buffer = pathbuf.join(file).into_os_string().into_encoded_bytes(); + buffer.push(0); + buffer + })) + }) + } else { + None + }; + GlobalState { + dlopen_next, + replacement_paths, + } +}); + +pub const RTLD_GLOBAL: c_int = 0x100; +pub const RTLD_LAZY: c_int = 1; + +#[ctor::ctor] +unsafe fn ctor() { + let GlobalState { + dlopen_next, + replacement_paths, + } = &*GLOBAL_STATE; + let dlopen_next = unwrap_some_or!(dlopen_next, return); + let replacement_paths = unwrap_some_or!(replacement_paths, return); + // We preload the paths to the files we want to redirect, because + // * We don't control dynamic linking when loading dependencies. We hijack + // dlopen, but that only works if the dependency has been explicitly + // loaded with dlopen. It does not intercept the loading of the dependencies + // * The first step that dynamic linker does is check if the file is already + // loaded + for replacement in replacement_paths.into_iter() { + dlopen_next(replacement.as_ptr().cast(), RTLD_GLOBAL | RTLD_LAZY).ok(); + } +} + +type DlopenResult = Result, ()>; + +const _: fn() = || { + let _ = std::mem::transmute::<*mut c_void, DlopenResult>; +}; + +#[no_mangle] +unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenResult { + let GlobalState { + dlopen_next, + replacement_paths, + } = &*GLOBAL_STATE; + let dlopen_next = dlopen_next.ok_or(())?; + dlopen_redirect(dlopen_next, replacement_paths, filename, flags) + .or_else(|| dlopen_next(filename, flags).ok()) + .ok_or(()) +} + +#[no_mangle] +unsafe extern "C" fn zluda_dlopen_noredirect( + filename: *const c_char, + flags: c_int, +) -> DlopenResult { + let dlopen_next = GLOBAL_STATE.dlopen_next.ok_or(())?; + dlopen_next(filename, flags) +} + +unsafe fn dlopen_redirect<'a>( + dlopen_next: unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult, + replacement_paths: &'a Option<[Vec; FILES_FOR_REDIRECT.len()]>, + input_path: *const c_char, + flags: c_int, +) -> Option> { + if input_path == ptr::null() { + return None; + } + let input_path = CStr::from_ptr(input_path).to_str().ok()?; + let replacement_paths = replacement_paths.as_ref()?; + let replacement_path = FILES_FOR_REDIRECT + .into_iter() + .zip(replacement_paths.into_iter()) + .find_map(|(file, path)| { + if input_path.ends_with(file) { + Some(path) + } else { + None + } + })?; + unsafe { dlopen_next(replacement_path.as_ptr() as _, flags) }.ok() +} diff --git a/zluda_trace/Cargo.toml b/zluda_trace/Cargo.toml index 510733e..b679eaf 100644 --- a/zluda_trace/Cargo.toml +++ b/zluda_trace/Cargo.toml @@ -27,6 +27,7 @@ rustc-hash = "1.1.0" cglue = "0.3.5" zstd-safe = { version = "7.2.4", features = ["std"] } unwrap_or = "1.0.1" +libloading = "0.8" [target.'cfg(windows)'.dependencies] winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] } diff --git a/zluda_trace/src/lib.rs b/zluda_trace/src/lib.rs index b512d47..964c09d 100644 --- a/zluda_trace/src/lib.rs +++ b/zluda_trace/src/lib.rs @@ -56,8 +56,8 @@ pub(crate) struct CudaDynamicFns { impl CudaDynamicFns { pub(crate) unsafe fn load_library(path: &str) -> Option { - let lib_handle = NonNull::new(os::load_library(path)); - lib_handle.map(|lib_handle| CudaDynamicFns { + let lib_handle = os::dlopen_local_noredirect(path).ok()?; + Some(CudaDynamicFns { lib_handle, fn_table: CudaFnTable::default(), }) diff --git a/zluda_trace/src/os_unix.rs b/zluda_trace/src/os_unix.rs index 8e0a510..43a33ee 100644 --- a/zluda_trace/src/os_unix.rs +++ b/zluda_trace/src/os_unix.rs @@ -1,15 +1,17 @@ use cuda_types::cuda::CUuuid; -use std::ffi::{c_void, CStr, CString}; +use std::borrow::Cow; +use std::ffi::{c_void, CStr}; use std::mem; +use std::ptr::NonNull; pub(crate) const LIBCUDA_DEFAULT_PATH: &str = "/usr/lib/x86_64-linux-gnu/libcuda.so.1"; -pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void { - let libcuda_path = CString::new(libcuda_path).unwrap(); - libc::dlopen( - libcuda_path.as_ptr() as *const _, - libc::RTLD_LOCAL | libc::RTLD_NOW, - ) +pub fn dlopen_local_noredirect<'a>( + path: impl Into>, +) -> Result, libloading::Error> { + let lib: libloading::os::unix::Library = + zluda_trace_common::dlopen_local_noredirect(path)?.into(); + NonNull::new(lib.into_raw()).ok_or(libloading::Error::DlOpenUnknown) } pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void { diff --git a/zluda_trace/src/os_win.rs b/zluda_trace/src/os_win.rs index 616b237..59beee8 100644 --- a/zluda_trace/src/os_win.rs +++ b/zluda_trace/src/os_win.rs @@ -1,22 +1,30 @@ +use cuda_types::cuda::CUuuid; +use std::borrow::Cow; +use std::os::windows::io::AsRawHandle; +use std::ptr::NonNull; use std::{ ffi::{c_void, CStr}, mem, ptr, sync::LazyLock, }; - -use std::os::windows::io::AsRawHandle; use winapi::{ shared::minwindef::{FARPROC, HMODULE}, um::debugapi::OutputDebugStringA, um::libloaderapi::{GetProcAddress, LoadLibraryW}, }; -use cuda_types::cuda::CUuuid; - pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = "C:\\Windows\\System32\\nvcuda.dll"; const LOAD_LIBRARY_NO_REDIRECT: &'static [u8] = b"ZludaLoadLibraryW_NoRedirect\0"; const GET_PROC_ADDRESS_NO_REDIRECT: &'static [u8] = b"ZludaGetProcAddress_NoRedirect\0"; +pub fn dlopen_local_noredirect<'a>( + path: impl Into>, +) -> Result, libloading::Error> { + let lib: libloading::os::windows::Library = + zluda_trace_common::dlopen_local_noredirect(path)?.into(); + NonNull::new(lib.into_raw() as *mut _).ok_or(libloading::Error::DlOpenUnknown) +} + static PLATFORM_LIBRARY: LazyLock = LazyLock::new(|| unsafe { PlatformLibrary::new() }); @@ -73,14 +81,6 @@ impl PlatformLibrary { } } -pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void { - let libcuda_path_uf16 = libcuda_path - .encode_utf16() - .chain(std::iter::once(0)) - .collect::>(); - (PLATFORM_LIBRARY.LoadLibraryW)(libcuda_path_uf16.as_ptr()) as _ -} - pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void { (PLATFORM_LIBRARY.GetProcAddress)(handle as _, func.as_ptr() as _) as _ } diff --git a/zluda_trace_blas/src/lib.rs b/zluda_trace_blas/src/lib.rs index 7ee9211..b1bc28f 100644 --- a/zluda_trace_blas/src/lib.rs +++ b/zluda_trace_blas/src/lib.rs @@ -7,7 +7,7 @@ fn get_library() -> Option { let cuda_lib = std::env::var("ZLUDA_BLAS_LIB") .ok() .unwrap_or_else(|| "/usr/local/cuda/lib64/libcublas.so".to_string()); - unsafe { Library::new(cuda_lib) }.ok() + zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok() } macro_rules! unimplemented { diff --git a/zluda_trace_blaslt/src/lib.rs b/zluda_trace_blaslt/src/lib.rs index 3c059e5..5202a79 100644 --- a/zluda_trace_blaslt/src/lib.rs +++ b/zluda_trace_blaslt/src/lib.rs @@ -7,7 +7,7 @@ fn get_library() -> Option { let cuda_lib = std::env::var("ZLUDA_BLASLT_LIB") .ok() .unwrap_or_else(|| "/usr/local/cuda/lib64/libcublasLt.so".to_string()); - unsafe { Library::new(cuda_lib) }.ok() + zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok() } macro_rules! unimplemented { diff --git a/zluda_trace_common/Cargo.toml b/zluda_trace_common/Cargo.toml index 6198186..3bf40b5 100644 --- a/zluda_trace_common/Cargo.toml +++ b/zluda_trace_common/Cargo.toml @@ -11,3 +11,6 @@ cuda_types = { path = "../cuda_types" } dark_api = { path = "../dark_api" } format = { path = "../format" } cglue = "0.3.5" + +[target.'cfg(not(windows))'.dependencies] +libc = "0.2" diff --git a/zluda_trace_common/src/lib.rs b/zluda_trace_common/src/lib.rs index 289dc7f..7c81c19 100644 --- a/zluda_trace_common/src/lib.rs +++ b/zluda_trace_common/src/lib.rs @@ -5,7 +5,7 @@ use cuda_types::{ cusparse::cusparseStatus_tConsts, }; use dark_api::ByteVecFfi; -use std::{ffi::c_void, num::NonZero, ptr, sync::LazyLock}; +use std::{borrow::Cow, ffi::c_void, num::NonZero, ptr, sync::LazyLock}; pub fn get_export_table() -> Option<::dark_api::zluda_trace::ZludaTraceInternal> { static CU_GET_EXPORT_TABLE: LazyLock< @@ -38,30 +38,95 @@ fn open_driver() -> Result { os::open_driver() } +pub fn dlopen_local_noredirect<'a>( + path: impl Into>, +) -> Result { + unsafe { os::dlopen_local_noredirect(path.into()) } +} + #[cfg(unix)] pub(crate) mod os { + use libc::{c_char, c_int}; use libloading::os; - - const RTLD_NOLOAD: i32 = 0x4; + use std::{borrow::Cow, ffi::c_void, mem}; pub fn open_driver() -> Result { unsafe { - os::unix::Library::open(Some("libcuda.so.1"), RTLD_NOLOAD | os::unix::RTLD_LAZY) - .or_else(|_| { - os::unix::Library::open(Some("libcuda.so"), RTLD_NOLOAD | os::unix::RTLD_LAZY) - }) - .map(Into::into) + os::unix::Library::open( + Some("libcuda.so.1"), + libc::RTLD_NOLOAD | os::unix::RTLD_LAZY, + ) + .or_else(|_| { + os::unix::Library::open(Some("libcuda.so"), libc::RTLD_NOLOAD | os::unix::RTLD_LAZY) + }) + .map(Into::into) } } + + pub unsafe fn dlopen_local_noredirect<'a>( + path: Cow<'a, str>, + ) -> Result { + fn terminate_with_nul<'a>(path: Cow<'a, str>) -> Cow<'a, str> { + let path = if !path.ends_with('\0') { + let mut path = path.into_owned(); + path.push('\0'); + Cow::Owned(path) + } else { + path + }; + path + } + let zluda_dlopen_noredirect = + unsafe { libc::dlsym(libc::RTLD_DEFAULT, c"zluda_dlopen_noredirect".as_ptr()) }; + let zluda_dlopen_noredirect = mem::transmute::< + _, + Option *mut c_void>, + >(zluda_dlopen_noredirect); + let dlopen = zluda_dlopen_noredirect.unwrap_or(libc::dlopen); + let path = terminate_with_nul(path); + Ok(libloading::os::unix::Library::from_raw(dlopen( + path.as_ptr().cast(), + os::unix::RTLD_LOCAL | os::unix::RTLD_LAZY, + )) + .into()) + } } #[cfg(windows)] pub(crate) mod os { use libloading::os; + use std::borrow::Cow; pub fn open_driver() -> Result { os::windows::Library::open_already_loaded("nvcuda").map(Into::into) } + + pub unsafe fn dlopen_local_noredirect<'a>( + path: Cow<'a, str>, + ) -> Result { + fn terminate_with_nul(mut path: Vec) -> Vec { + if path.last().copied() == Some(0) { + path.push(0); + } + path + } + let driver = open_driver()?; + match driver.get:: isize>( + c"ZludaLoadLibraryW_NoRedirect".to_bytes_with_nul(), + ) { + Ok(load_library) => { + let symbol = load_library( + terminate_with_nul(path.encode_utf16().collect::>()).as_ptr(), + ); + if symbol == 0 { + Err(libloading::Error::LoadLibraryExWUnknown) + } else { + Ok(libloading::os::windows::Library::from_raw(symbol).into()) + } + } + Err(_) => libloading::Library::new(&*path), + } + } } pub trait ReprUsize { diff --git a/zluda_trace_dnn/src/lib.rs b/zluda_trace_dnn/src/lib.rs index 430633b..e5252f5 100644 --- a/zluda_trace_dnn/src/lib.rs +++ b/zluda_trace_dnn/src/lib.rs @@ -7,7 +7,7 @@ fn get_library() -> Option { let cuda_lib = std::env::var("ZLUDA_DNN_LIB") .ok() .unwrap_or_else(|| "/usr/lib/x86_64-linux-gnu/libcudnn.so.9".to_string()); - unsafe { Library::new(cuda_lib) }.ok() + zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok() } macro_rules! unimplemented { diff --git a/zluda_trace_fft/src/lib.rs b/zluda_trace_fft/src/lib.rs index 98864d4..dbedc29 100644 --- a/zluda_trace_fft/src/lib.rs +++ b/zluda_trace_fft/src/lib.rs @@ -7,7 +7,7 @@ fn get_library() -> Option { let cuda_lib = std::env::var("ZLUDA_FFT_LIB") .ok() .unwrap_or_else(|| "/usr/local/cuda/lib64/libcufft.so".to_string()); - unsafe { Library::new(cuda_lib) }.ok() + zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok() } macro_rules! unimplemented { diff --git a/zluda_trace_sparse/src/lib.rs b/zluda_trace_sparse/src/lib.rs index 715d0c5..44a286d 100644 --- a/zluda_trace_sparse/src/lib.rs +++ b/zluda_trace_sparse/src/lib.rs @@ -7,7 +7,7 @@ fn get_library() -> Option { let cuda_lib = std::env::var("ZLUDA_SPARSE_LIB") .ok() .unwrap_or_else(|| "/usr/local/cuda/lib64/libcusparse.so".to_string()); - unsafe { Library::new(cuda_lib) }.ok() + zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok() } macro_rules! unimplemented {