mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-02 14:57:43 +03:00
Force loading ZLUDA through LD_PRELOAD (#447)
Certain applications (pytorch) decide that it's a great idea to distribute whole CUDA driver and link to it with DT_RPATH. This igores LD_LIBRARY_PATH. This code defeats that evil mechanism through any means necessary
This commit is contained in:
45
Cargo.lock
generated
45
Cargo.lock
generated
@ -300,6 +300,22 @@ dependencies = [
|
|||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ctor"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6"
|
||||||
|
dependencies = [
|
||||||
|
"ctor-proc-macro",
|
||||||
|
"dtor 0.0.6",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ctor-proc-macro"
|
||||||
|
version = "0.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cuda_macros"
|
name = "cuda_macros"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
@ -406,7 +422,16 @@ version = "0.0.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8"
|
checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"dtor-proc-macro",
|
"dtor-proc-macro 0.0.5",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtor"
|
||||||
|
version = "0.0.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bbc66182e62c4e716e2d70f97beceea0de798923f8ca48fb82aa3134dc3cae12"
|
||||||
|
dependencies = [
|
||||||
|
"dtor-proc-macro 0.0.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -415,6 +440,12 @@ version = "0.0.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055"
|
checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtor-proc-macro"
|
||||||
|
version = "0.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dynasm"
|
name = "dynasm"
|
||||||
version = "1.2.3"
|
version = "1.2.3"
|
||||||
@ -1781,7 +1812,7 @@ dependencies = [
|
|||||||
"cuda_macros",
|
"cuda_macros",
|
||||||
"cuda_types",
|
"cuda_types",
|
||||||
"dark_api",
|
"dark_api",
|
||||||
"dtor",
|
"dtor 0.0.7",
|
||||||
"hip_runtime-sys",
|
"hip_runtime-sys",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
@ -1874,6 +1905,14 @@ dependencies = [
|
|||||||
"cuda_types",
|
"cuda_types",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zluda_preload"
|
||||||
|
version = "0.0.0"
|
||||||
|
dependencies = [
|
||||||
|
"ctor",
|
||||||
|
"unwrap_or",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zluda_redirect"
|
name = "zluda_redirect"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
@ -1905,6 +1944,7 @@ dependencies = [
|
|||||||
"format",
|
"format",
|
||||||
"goblin",
|
"goblin",
|
||||||
"libc",
|
"libc",
|
||||||
|
"libloading",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"paste",
|
"paste",
|
||||||
"ptx",
|
"ptx",
|
||||||
@ -1956,6 +1996,7 @@ dependencies = [
|
|||||||
"cuda_types",
|
"cuda_types",
|
||||||
"dark_api",
|
"dark_api",
|
||||||
"format",
|
"format",
|
||||||
|
"libc",
|
||||||
"libloading",
|
"libloading",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@ members = [
|
|||||||
"zluda_fft",
|
"zluda_fft",
|
||||||
"zluda_inject",
|
"zluda_inject",
|
||||||
"zluda_ml",
|
"zluda_ml",
|
||||||
|
"zluda_preload",
|
||||||
"zluda_redirect",
|
"zluda_redirect",
|
||||||
"zluda_sparse",
|
"zluda_sparse",
|
||||||
]
|
]
|
||||||
|
@ -155,6 +155,8 @@ struct Metadata {
|
|||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
struct ZludaMetadata {
|
struct ZludaMetadata {
|
||||||
|
#[serde(default)]
|
||||||
|
linux_only: bool,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
windows_only: bool,
|
windows_only: bool,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@ -192,6 +194,9 @@ fn compile(b: Build) -> (PathBuf, String, Vec<Project>) {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(Project::try_new)
|
.filter_map(Project::try_new)
|
||||||
.filter(|project| {
|
.filter(|project| {
|
||||||
|
if project.meta.linux_only && cfg!(windows) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if project.meta.windows_only && cfg!(not(windows)) {
|
if project.meta.windows_only && cfg!(not(windows)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,6 @@ lz4-sys = "1.9"
|
|||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
paste = "1.0"
|
paste = "1.0"
|
||||||
rustc-hash = "1.1"
|
rustc-hash = "1.1"
|
||||||
dtor = "0.0.6"
|
|
||||||
zluda_common = { path = "../zluda_common" }
|
zluda_common = { path = "../zluda_common" }
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
@ -30,7 +29,7 @@ winapi = { version = "0.3", features = ["heapapi", "std"] }
|
|||||||
|
|
||||||
[target.'cfg(not(windows))'.dependencies]
|
[target.'cfg(not(windows))'.dependencies]
|
||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
dtor = "0.0.6"
|
dtor = "0.0.7"
|
||||||
|
|
||||||
[package.metadata.zluda]
|
[package.metadata.zluda]
|
||||||
linux_symlinks = [
|
linux_symlinks = [
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
use std::mem;
|
|
||||||
|
|
||||||
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
|
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
|
||||||
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
|
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
|
||||||
@ -36,7 +35,7 @@ pub(crate) fn get_address_range_v2(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_d32_v2(dst: hipDeviceptr_t, ui: ::core::ffi::c_uint, n: usize) -> hipError_t {
|
pub(crate) fn set_d32_v2(dst: hipDeviceptr_t, ui: ::core::ffi::c_uint, n: usize) -> hipError_t {
|
||||||
unsafe { hipMemsetD32(dst, mem::transmute(ui), n) }
|
unsafe { hipMemsetD32(dst, ui as std::ffi::c_int, n) }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_d8_v2(dst: hipDeviceptr_t, value: ::core::ffi::c_uchar, n: usize) -> hipError_t {
|
pub(crate) fn set_d8_v2(dst: hipDeviceptr_t, value: ::core::ffi::c_uchar, n: usize) -> hipError_t {
|
||||||
|
20
zluda_preload/Cargo.toml
Normal file
20
zluda_preload/Cargo.toml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[package]
|
||||||
|
name = "zluda_preload"
|
||||||
|
version = "0.0.0"
|
||||||
|
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
ctor = "0.4.3"
|
||||||
|
unwrap_or = "1.0.1"
|
||||||
|
|
||||||
|
[package.metadata.zluda]
|
||||||
|
linux_only = true
|
||||||
|
linux_symlinks = [
|
||||||
|
"zluda_preload",
|
||||||
|
"trace/zluda_preload",
|
||||||
|
"trace_nvidia/zluda_preload",
|
||||||
|
]
|
21
zluda_preload/README.md
Normal file
21
zluda_preload/README.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
This crate is a last resort Linux-specific solution.
|
||||||
|
Most of the time we can inject ourselves into a process by having users
|
||||||
|
set `LD_LIBRARY_PATH`.
|
||||||
|
Unfortunately, there is software out there which dynamically links to CUDA and
|
||||||
|
CUDA performance libraries using RPATH. On Linux, dynamic linker operates
|
||||||
|
using approximately this algorithm:
|
||||||
|
* If path contains `/` treat the name as a (possibly relative) path and just use it
|
||||||
|
* Otherwise return the first that succeeds:
|
||||||
|
* Library with this name already loaded into the process
|
||||||
|
* Try paths in `DT_RPATH` (if `DT_RUNPATH` is not present)
|
||||||
|
* Try paths in `LD_LIBRARY_PATH`
|
||||||
|
* Try paths in `DT_RUNPATH`
|
||||||
|
* Try system paths
|
||||||
|
|
||||||
|
In order to defeat `DT_RPATH` this library needs to be preloaded with `LD_PRELOAD`.
|
||||||
|
On initialization we also preload all the performance libraries. We also hijack
|
||||||
|
`dlopen` and on every call to `dlopen` that tries to open a CUDA library we
|
||||||
|
redirect it to our libraries
|
||||||
|
|
||||||
|
We also expose `zluda_dlopen_noredirect` for the purpose of tracing libraries
|
||||||
|
so they can load real underlying library and not just get redirected to themselves
|
150
zluda_preload/src/lib.rs
Normal file
150
zluda_preload/src/lib.rs
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
use std::{
|
||||||
|
ffi::{c_char, c_int, c_void, CStr},
|
||||||
|
mem,
|
||||||
|
path::PathBuf,
|
||||||
|
ptr::{self, NonNull},
|
||||||
|
sync::LazyLock,
|
||||||
|
};
|
||||||
|
use unwrap_or::unwrap_some_or;
|
||||||
|
|
||||||
|
// Definition takes from `libc` crate:
|
||||||
|
// https://github.com/rust-lang/libc/blob/cf82fdf3f22ccfa98ba120efc50d5f39ab2d52ff/src/unix/linux_like/linux/mod.rs#L2682
|
||||||
|
const RTLD_NEXT: *mut c_void = -1isize as _;
|
||||||
|
|
||||||
|
unsafe extern "C" {
|
||||||
|
fn dlsym(handle: *mut c_void, symbol: *const c_char) -> *mut c_void;
|
||||||
|
fn dladdr(addr: *const c_void, info: *mut DLInfo) -> c_int;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
struct DLInfo {
|
||||||
|
dli_fname: *const c_char,
|
||||||
|
dli_fbase: *mut c_void,
|
||||||
|
dli_sname: *const c_char,
|
||||||
|
dli_saddr: *mut c_void,
|
||||||
|
}
|
||||||
|
|
||||||
|
static FILES_FOR_REDIRECT: [&'static str; 14] = [
|
||||||
|
"libcublas.so",
|
||||||
|
"libcublas.so.12",
|
||||||
|
"libcublasLt.so",
|
||||||
|
"libcublasLt.so.12",
|
||||||
|
"libcuda.so",
|
||||||
|
"libcuda.so.1",
|
||||||
|
"libcudnn.so",
|
||||||
|
"libcudnn.so.9",
|
||||||
|
"libcufft.so",
|
||||||
|
"libcufft.so.11",
|
||||||
|
"libcusparse.so",
|
||||||
|
"libcusparse.so.12",
|
||||||
|
"libnvidia-ml.so",
|
||||||
|
"libnvidia-ml.so.1",
|
||||||
|
];
|
||||||
|
|
||||||
|
// Global state, caching some computations that would be otherwise repeated on every `dlopen`
|
||||||
|
struct GlobalState {
|
||||||
|
/// The original `dlopen` implementation from libdl.
|
||||||
|
dlopen_next: Option<unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult>,
|
||||||
|
/// The full paths of the file names from `FILES_FOR_REDIRECT` that will be used for redirection
|
||||||
|
replacement_paths: Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
static GLOBAL_STATE: LazyLock<GlobalState> = LazyLock::new(|| {
|
||||||
|
let dlopen_next = unsafe { mem::transmute(dlsym(RTLD_NEXT, c"dlopen".as_ptr())) };
|
||||||
|
let mut self_dlinfo = unsafe { mem::zeroed::<DLInfo>() };
|
||||||
|
let replacement_paths = if unsafe { dladdr(dlopen as _, &mut self_dlinfo) } != 0 {
|
||||||
|
unsafe { CStr::from_ptr(self_dlinfo.dli_fname) }
|
||||||
|
.to_str()
|
||||||
|
.ok()
|
||||||
|
.and_then(|path| {
|
||||||
|
let mut pathbuf = PathBuf::from(path);
|
||||||
|
if !pathbuf.pop() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(FILES_FOR_REDIRECT.map(|file| {
|
||||||
|
let mut buffer = pathbuf.join(file).into_os_string().into_encoded_bytes();
|
||||||
|
buffer.push(0);
|
||||||
|
buffer
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
GlobalState {
|
||||||
|
dlopen_next,
|
||||||
|
replacement_paths,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
pub const RTLD_GLOBAL: c_int = 0x100;
|
||||||
|
pub const RTLD_LAZY: c_int = 1;
|
||||||
|
|
||||||
|
#[ctor::ctor]
|
||||||
|
unsafe fn ctor() {
|
||||||
|
let GlobalState {
|
||||||
|
dlopen_next,
|
||||||
|
replacement_paths,
|
||||||
|
} = &*GLOBAL_STATE;
|
||||||
|
let dlopen_next = unwrap_some_or!(dlopen_next, return);
|
||||||
|
let replacement_paths = unwrap_some_or!(replacement_paths, return);
|
||||||
|
// We preload the paths to the files we want to redirect, because
|
||||||
|
// * We don't control dynamic linking when loading dependencies. We hijack
|
||||||
|
// dlopen, but that only works if the dependency has been explicitly
|
||||||
|
// loaded with dlopen. It does not intercept the loading of the dependencies
|
||||||
|
// * The first step that dynamic linker does is check if the file is already
|
||||||
|
// loaded
|
||||||
|
for replacement in replacement_paths.into_iter() {
|
||||||
|
dlopen_next(replacement.as_ptr().cast(), RTLD_GLOBAL | RTLD_LAZY).ok();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type DlopenResult = Result<NonNull<c_void>, ()>;
|
||||||
|
|
||||||
|
const _: fn() = || {
|
||||||
|
let _ = std::mem::transmute::<*mut c_void, DlopenResult>;
|
||||||
|
};
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
unsafe extern "C" fn dlopen(filename: *const c_char, flags: c_int) -> DlopenResult {
|
||||||
|
let GlobalState {
|
||||||
|
dlopen_next,
|
||||||
|
replacement_paths,
|
||||||
|
} = &*GLOBAL_STATE;
|
||||||
|
let dlopen_next = dlopen_next.ok_or(())?;
|
||||||
|
dlopen_redirect(dlopen_next, replacement_paths, filename, flags)
|
||||||
|
.or_else(|| dlopen_next(filename, flags).ok())
|
||||||
|
.ok_or(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[no_mangle]
|
||||||
|
unsafe extern "C" fn zluda_dlopen_noredirect(
|
||||||
|
filename: *const c_char,
|
||||||
|
flags: c_int,
|
||||||
|
) -> DlopenResult {
|
||||||
|
let dlopen_next = GLOBAL_STATE.dlopen_next.ok_or(())?;
|
||||||
|
dlopen_next(filename, flags)
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn dlopen_redirect<'a>(
|
||||||
|
dlopen_next: unsafe extern "C" fn(*const c_char, c_int) -> DlopenResult,
|
||||||
|
replacement_paths: &'a Option<[Vec<u8>; FILES_FOR_REDIRECT.len()]>,
|
||||||
|
input_path: *const c_char,
|
||||||
|
flags: c_int,
|
||||||
|
) -> Option<NonNull<c_void>> {
|
||||||
|
if input_path == ptr::null() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let input_path = CStr::from_ptr(input_path).to_str().ok()?;
|
||||||
|
let replacement_paths = replacement_paths.as_ref()?;
|
||||||
|
let replacement_path = FILES_FOR_REDIRECT
|
||||||
|
.into_iter()
|
||||||
|
.zip(replacement_paths.into_iter())
|
||||||
|
.find_map(|(file, path)| {
|
||||||
|
if input_path.ends_with(file) {
|
||||||
|
Some(path)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
unsafe { dlopen_next(replacement_path.as_ptr() as _, flags) }.ok()
|
||||||
|
}
|
@ -27,6 +27,7 @@ rustc-hash = "1.1.0"
|
|||||||
cglue = "0.3.5"
|
cglue = "0.3.5"
|
||||||
zstd-safe = { version = "7.2.4", features = ["std"] }
|
zstd-safe = { version = "7.2.4", features = ["std"] }
|
||||||
unwrap_or = "1.0.1"
|
unwrap_or = "1.0.1"
|
||||||
|
libloading = "0.8"
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }
|
winapi = { version = "0.3", features = ["libloaderapi", "debugapi", "std"] }
|
||||||
|
@ -56,8 +56,8 @@ pub(crate) struct CudaDynamicFns {
|
|||||||
|
|
||||||
impl CudaDynamicFns {
|
impl CudaDynamicFns {
|
||||||
pub(crate) unsafe fn load_library(path: &str) -> Option<Self> {
|
pub(crate) unsafe fn load_library(path: &str) -> Option<Self> {
|
||||||
let lib_handle = NonNull::new(os::load_library(path));
|
let lib_handle = os::dlopen_local_noredirect(path).ok()?;
|
||||||
lib_handle.map(|lib_handle| CudaDynamicFns {
|
Some(CudaDynamicFns {
|
||||||
lib_handle,
|
lib_handle,
|
||||||
fn_table: CudaFnTable::default(),
|
fn_table: CudaFnTable::default(),
|
||||||
})
|
})
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
use cuda_types::cuda::CUuuid;
|
use cuda_types::cuda::CUuuid;
|
||||||
use std::ffi::{c_void, CStr, CString};
|
use std::borrow::Cow;
|
||||||
|
use std::ffi::{c_void, CStr};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
use std::ptr::NonNull;
|
||||||
|
|
||||||
pub(crate) const LIBCUDA_DEFAULT_PATH: &str = "/usr/lib/x86_64-linux-gnu/libcuda.so.1";
|
pub(crate) const LIBCUDA_DEFAULT_PATH: &str = "/usr/lib/x86_64-linux-gnu/libcuda.so.1";
|
||||||
|
|
||||||
pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void {
|
pub fn dlopen_local_noredirect<'a>(
|
||||||
let libcuda_path = CString::new(libcuda_path).unwrap();
|
path: impl Into<Cow<'a, str>>,
|
||||||
libc::dlopen(
|
) -> Result<NonNull<c_void>, libloading::Error> {
|
||||||
libcuda_path.as_ptr() as *const _,
|
let lib: libloading::os::unix::Library =
|
||||||
libc::RTLD_LOCAL | libc::RTLD_NOW,
|
zluda_trace_common::dlopen_local_noredirect(path)?.into();
|
||||||
)
|
NonNull::new(lib.into_raw()).ok_or(libloading::Error::DlOpenUnknown)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
|
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
|
||||||
|
@ -1,22 +1,30 @@
|
|||||||
|
use cuda_types::cuda::CUuuid;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::os::windows::io::AsRawHandle;
|
||||||
|
use std::ptr::NonNull;
|
||||||
use std::{
|
use std::{
|
||||||
ffi::{c_void, CStr},
|
ffi::{c_void, CStr},
|
||||||
mem, ptr,
|
mem, ptr,
|
||||||
sync::LazyLock,
|
sync::LazyLock,
|
||||||
};
|
};
|
||||||
|
|
||||||
use std::os::windows::io::AsRawHandle;
|
|
||||||
use winapi::{
|
use winapi::{
|
||||||
shared::minwindef::{FARPROC, HMODULE},
|
shared::minwindef::{FARPROC, HMODULE},
|
||||||
um::debugapi::OutputDebugStringA,
|
um::debugapi::OutputDebugStringA,
|
||||||
um::libloaderapi::{GetProcAddress, LoadLibraryW},
|
um::libloaderapi::{GetProcAddress, LoadLibraryW},
|
||||||
};
|
};
|
||||||
|
|
||||||
use cuda_types::cuda::CUuuid;
|
|
||||||
|
|
||||||
pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = "C:\\Windows\\System32\\nvcuda.dll";
|
pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = "C:\\Windows\\System32\\nvcuda.dll";
|
||||||
const LOAD_LIBRARY_NO_REDIRECT: &'static [u8] = b"ZludaLoadLibraryW_NoRedirect\0";
|
const LOAD_LIBRARY_NO_REDIRECT: &'static [u8] = b"ZludaLoadLibraryW_NoRedirect\0";
|
||||||
const GET_PROC_ADDRESS_NO_REDIRECT: &'static [u8] = b"ZludaGetProcAddress_NoRedirect\0";
|
const GET_PROC_ADDRESS_NO_REDIRECT: &'static [u8] = b"ZludaGetProcAddress_NoRedirect\0";
|
||||||
|
|
||||||
|
pub fn dlopen_local_noredirect<'a>(
|
||||||
|
path: impl Into<Cow<'a, str>>,
|
||||||
|
) -> Result<NonNull<c_void>, libloading::Error> {
|
||||||
|
let lib: libloading::os::windows::Library =
|
||||||
|
zluda_trace_common::dlopen_local_noredirect(path)?.into();
|
||||||
|
NonNull::new(lib.into_raw() as *mut _).ok_or(libloading::Error::DlOpenUnknown)
|
||||||
|
}
|
||||||
|
|
||||||
static PLATFORM_LIBRARY: LazyLock<PlatformLibrary> =
|
static PLATFORM_LIBRARY: LazyLock<PlatformLibrary> =
|
||||||
LazyLock::new(|| unsafe { PlatformLibrary::new() });
|
LazyLock::new(|| unsafe { PlatformLibrary::new() });
|
||||||
|
|
||||||
@ -73,14 +81,6 @@ impl PlatformLibrary {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void {
|
|
||||||
let libcuda_path_uf16 = libcuda_path
|
|
||||||
.encode_utf16()
|
|
||||||
.chain(std::iter::once(0))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
(PLATFORM_LIBRARY.LoadLibraryW)(libcuda_path_uf16.as_ptr()) as _
|
|
||||||
}
|
|
||||||
|
|
||||||
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
|
pub unsafe fn get_proc_address(handle: *mut c_void, func: &CStr) -> *mut c_void {
|
||||||
(PLATFORM_LIBRARY.GetProcAddress)(handle as _, func.as_ptr() as _) as _
|
(PLATFORM_LIBRARY.GetProcAddress)(handle as _, func.as_ptr() as _) as _
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ fn get_library() -> Option<Library> {
|
|||||||
let cuda_lib = std::env::var("ZLUDA_BLAS_LIB")
|
let cuda_lib = std::env::var("ZLUDA_BLAS_LIB")
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublas.so".to_string());
|
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublas.so".to_string());
|
||||||
unsafe { Library::new(cuda_lib) }.ok()
|
zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! unimplemented {
|
macro_rules! unimplemented {
|
||||||
|
@ -7,7 +7,7 @@ fn get_library() -> Option<Library> {
|
|||||||
let cuda_lib = std::env::var("ZLUDA_BLASLT_LIB")
|
let cuda_lib = std::env::var("ZLUDA_BLASLT_LIB")
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublasLt.so".to_string());
|
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcublasLt.so".to_string());
|
||||||
unsafe { Library::new(cuda_lib) }.ok()
|
zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! unimplemented {
|
macro_rules! unimplemented {
|
||||||
|
@ -11,3 +11,6 @@ cuda_types = { path = "../cuda_types" }
|
|||||||
dark_api = { path = "../dark_api" }
|
dark_api = { path = "../dark_api" }
|
||||||
format = { path = "../format" }
|
format = { path = "../format" }
|
||||||
cglue = "0.3.5"
|
cglue = "0.3.5"
|
||||||
|
|
||||||
|
[target.'cfg(not(windows))'.dependencies]
|
||||||
|
libc = "0.2"
|
||||||
|
@ -5,7 +5,7 @@ use cuda_types::{
|
|||||||
cusparse::cusparseStatus_tConsts,
|
cusparse::cusparseStatus_tConsts,
|
||||||
};
|
};
|
||||||
use dark_api::ByteVecFfi;
|
use dark_api::ByteVecFfi;
|
||||||
use std::{ffi::c_void, num::NonZero, ptr, sync::LazyLock};
|
use std::{borrow::Cow, ffi::c_void, num::NonZero, ptr, sync::LazyLock};
|
||||||
|
|
||||||
pub fn get_export_table() -> Option<::dark_api::zluda_trace::ZludaTraceInternal> {
|
pub fn get_export_table() -> Option<::dark_api::zluda_trace::ZludaTraceInternal> {
|
||||||
static CU_GET_EXPORT_TABLE: LazyLock<
|
static CU_GET_EXPORT_TABLE: LazyLock<
|
||||||
@ -38,30 +38,95 @@ fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
|||||||
os::open_driver()
|
os::open_driver()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn dlopen_local_noredirect<'a>(
|
||||||
|
path: impl Into<Cow<'a, str>>,
|
||||||
|
) -> Result<libloading::Library, libloading::Error> {
|
||||||
|
unsafe { os::dlopen_local_noredirect(path.into()) }
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
pub(crate) mod os {
|
pub(crate) mod os {
|
||||||
|
use libc::{c_char, c_int};
|
||||||
use libloading::os;
|
use libloading::os;
|
||||||
|
use std::{borrow::Cow, ffi::c_void, mem};
|
||||||
const RTLD_NOLOAD: i32 = 0x4;
|
|
||||||
|
|
||||||
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
||||||
unsafe {
|
unsafe {
|
||||||
os::unix::Library::open(Some("libcuda.so.1"), RTLD_NOLOAD | os::unix::RTLD_LAZY)
|
os::unix::Library::open(
|
||||||
.or_else(|_| {
|
Some("libcuda.so.1"),
|
||||||
os::unix::Library::open(Some("libcuda.so"), RTLD_NOLOAD | os::unix::RTLD_LAZY)
|
libc::RTLD_NOLOAD | os::unix::RTLD_LAZY,
|
||||||
})
|
)
|
||||||
.map(Into::into)
|
.or_else(|_| {
|
||||||
|
os::unix::Library::open(Some("libcuda.so"), libc::RTLD_NOLOAD | os::unix::RTLD_LAZY)
|
||||||
|
})
|
||||||
|
.map(Into::into)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn dlopen_local_noredirect<'a>(
|
||||||
|
path: Cow<'a, str>,
|
||||||
|
) -> Result<libloading::Library, libloading::Error> {
|
||||||
|
fn terminate_with_nul<'a>(path: Cow<'a, str>) -> Cow<'a, str> {
|
||||||
|
let path = if !path.ends_with('\0') {
|
||||||
|
let mut path = path.into_owned();
|
||||||
|
path.push('\0');
|
||||||
|
Cow::Owned(path)
|
||||||
|
} else {
|
||||||
|
path
|
||||||
|
};
|
||||||
|
path
|
||||||
|
}
|
||||||
|
let zluda_dlopen_noredirect =
|
||||||
|
unsafe { libc::dlsym(libc::RTLD_DEFAULT, c"zluda_dlopen_noredirect".as_ptr()) };
|
||||||
|
let zluda_dlopen_noredirect = mem::transmute::<
|
||||||
|
_,
|
||||||
|
Option<unsafe extern "C" fn(*const c_char, c_int) -> *mut c_void>,
|
||||||
|
>(zluda_dlopen_noredirect);
|
||||||
|
let dlopen = zluda_dlopen_noredirect.unwrap_or(libc::dlopen);
|
||||||
|
let path = terminate_with_nul(path);
|
||||||
|
Ok(libloading::os::unix::Library::from_raw(dlopen(
|
||||||
|
path.as_ptr().cast(),
|
||||||
|
os::unix::RTLD_LOCAL | os::unix::RTLD_LAZY,
|
||||||
|
))
|
||||||
|
.into())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
pub(crate) mod os {
|
pub(crate) mod os {
|
||||||
use libloading::os;
|
use libloading::os;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
pub fn open_driver() -> Result<libloading::Library, libloading::Error> {
|
||||||
os::windows::Library::open_already_loaded("nvcuda").map(Into::into)
|
os::windows::Library::open_already_loaded("nvcuda").map(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn dlopen_local_noredirect<'a>(
|
||||||
|
path: Cow<'a, str>,
|
||||||
|
) -> Result<libloading::Library, libloading::Error> {
|
||||||
|
fn terminate_with_nul(mut path: Vec<u16>) -> Vec<u16> {
|
||||||
|
if path.last().copied() == Some(0) {
|
||||||
|
path.push(0);
|
||||||
|
}
|
||||||
|
path
|
||||||
|
}
|
||||||
|
let driver = open_driver()?;
|
||||||
|
match driver.get::<unsafe extern "C" fn(*const u16) -> isize>(
|
||||||
|
c"ZludaLoadLibraryW_NoRedirect".to_bytes_with_nul(),
|
||||||
|
) {
|
||||||
|
Ok(load_library) => {
|
||||||
|
let symbol = load_library(
|
||||||
|
terminate_with_nul(path.encode_utf16().collect::<Vec<u16>>()).as_ptr(),
|
||||||
|
);
|
||||||
|
if symbol == 0 {
|
||||||
|
Err(libloading::Error::LoadLibraryExWUnknown)
|
||||||
|
} else {
|
||||||
|
Ok(libloading::os::windows::Library::from_raw(symbol).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => libloading::Library::new(&*path),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait ReprUsize {
|
pub trait ReprUsize {
|
||||||
|
@ -7,7 +7,7 @@ fn get_library() -> Option<Library> {
|
|||||||
let cuda_lib = std::env::var("ZLUDA_DNN_LIB")
|
let cuda_lib = std::env::var("ZLUDA_DNN_LIB")
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap_or_else(|| "/usr/lib/x86_64-linux-gnu/libcudnn.so.9".to_string());
|
.unwrap_or_else(|| "/usr/lib/x86_64-linux-gnu/libcudnn.so.9".to_string());
|
||||||
unsafe { Library::new(cuda_lib) }.ok()
|
zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! unimplemented {
|
macro_rules! unimplemented {
|
||||||
|
@ -7,7 +7,7 @@ fn get_library() -> Option<Library> {
|
|||||||
let cuda_lib = std::env::var("ZLUDA_FFT_LIB")
|
let cuda_lib = std::env::var("ZLUDA_FFT_LIB")
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcufft.so".to_string());
|
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcufft.so".to_string());
|
||||||
unsafe { Library::new(cuda_lib) }.ok()
|
zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! unimplemented {
|
macro_rules! unimplemented {
|
||||||
|
@ -7,7 +7,7 @@ fn get_library() -> Option<Library> {
|
|||||||
let cuda_lib = std::env::var("ZLUDA_SPARSE_LIB")
|
let cuda_lib = std::env::var("ZLUDA_SPARSE_LIB")
|
||||||
.ok()
|
.ok()
|
||||||
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcusparse.so".to_string());
|
.unwrap_or_else(|| "/usr/local/cuda/lib64/libcusparse.so".to_string());
|
||||||
unsafe { Library::new(cuda_lib) }.ok()
|
zluda_trace_common::dlopen_local_noredirect(cuda_lib).ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! unimplemented {
|
macro_rules! unimplemented {
|
||||||
|
Reference in New Issue
Block a user