mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-07-20 10:46:21 +03:00
Implement more CUDA driver API to enable simple cuda-samples (#405)
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -1776,6 +1776,7 @@ dependencies = [
|
|||||||
"dtor",
|
"dtor",
|
||||||
"hip_runtime-sys",
|
"hip_runtime-sys",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
"libc",
|
||||||
"lz4-sys",
|
"lz4-sys",
|
||||||
"num_enum",
|
"num_enum",
|
||||||
"paste",
|
"paste",
|
||||||
|
@ -257,25 +257,25 @@ dark_api! {
|
|||||||
[6] = get_unknown_buffer2(ptr: *mut *mut std::ffi::c_void, size: *mut usize) -> ()
|
[6] = get_unknown_buffer2(ptr: *mut *mut std::ffi::c_void, size: *mut usize) -> ()
|
||||||
},
|
},
|
||||||
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
|
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
|
||||||
[0] = context_local_storage_ctor(
|
[0] = context_local_storage_put(
|
||||||
context: cuda_types::cuda::CUcontext,
|
context: cuda_types::cuda::CUcontext,
|
||||||
manager: *mut std::ffi::c_void, // ContextStateManager
|
key: *mut std::ffi::c_void,
|
||||||
ctx_state: *mut std::ffi::c_void, // ContextState
|
value: *mut std::ffi::c_void,
|
||||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||||
dtor_cb: Option<extern "system" fn(
|
dtor_cb: Option<extern "system" fn(
|
||||||
cuda_types::cuda::CUcontext,
|
cuda_types::cuda::CUcontext,
|
||||||
*mut std::ffi::c_void, // ContextStateManager
|
*mut std::ffi::c_void,
|
||||||
*mut std::ffi::c_void, // ContextState
|
*mut std::ffi::c_void,
|
||||||
)>
|
)>
|
||||||
) -> cuda_types::cuda::CUresult,
|
) -> cuda_types::cuda::CUresult,
|
||||||
[1] = context_local_storage_dtor(
|
[1] = context_local_storage_delete(
|
||||||
arg1: *mut std::ffi::c_void,
|
context: cuda_types::cuda::CUcontext,
|
||||||
arg2: *mut std::ffi::c_void
|
key: *mut std::ffi::c_void
|
||||||
) -> cuda_types::cuda::CUresult,
|
) -> cuda_types::cuda::CUresult,
|
||||||
[2] = context_local_storage_get_state(
|
[2] = context_local_storage_get(
|
||||||
ctx_state: *mut std::ffi::c_void, // ContextState
|
value: *mut *mut std::ffi::c_void,
|
||||||
cu_ctx: cuda_types::cuda::CUcontext,
|
cu_ctx: cuda_types::cuda::CUcontext,
|
||||||
manager: *mut std::ffi::c_void // ContextStateManager
|
key: *mut std::ffi::c_void
|
||||||
) -> cuda_types::cuda::CUresult
|
) -> cuda_types::cuda::CUresult
|
||||||
},
|
},
|
||||||
"{0CA50B8C-1004-929A-89A7-D0DF10E77286}" => CTX_CREATE_BYPASS[2] {
|
"{0CA50B8C-1004-929A-89A7-D0DF10E77286}" => CTX_CREATE_BYPASS[2] {
|
||||||
|
@ -27,6 +27,9 @@ dtor = "0.0.6"
|
|||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
winapi = { version = "0.3", features = ["heapapi", "std"] }
|
winapi = { version = "0.3", features = ["heapapi", "std"] }
|
||||||
|
|
||||||
|
[target.'cfg(not(windows))'.dependencies]
|
||||||
|
libc = "0.2"
|
||||||
|
|
||||||
[package.metadata.zluda]
|
[package.metadata.zluda]
|
||||||
linux_symlinks = [
|
linux_symlinks = [
|
||||||
"libcuda.so",
|
"libcuda.so",
|
||||||
|
@ -1,23 +1,93 @@
|
|||||||
use super::{driver, FromCuda, ZludaObject};
|
use super::{FromCuda, ZludaObject, module};
|
||||||
use cuda_types::cuda::*;
|
use cuda_types::cuda::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
use rustc_hash::FxHashSet;
|
use rustc_hash::{FxHashSet, FxHashMap};
|
||||||
use std::{cell::RefCell, ptr, sync::Mutex};
|
use std::{cell::RefCell, ptr, sync::Mutex, ffi::c_void};
|
||||||
|
|
||||||
|
|
||||||
thread_local! {
|
thread_local! {
|
||||||
pub(crate) static CONTEXT_STACK: RefCell<Vec<(CUcontext, hipDevice_t)>> = RefCell::new(Vec::new());
|
pub(crate) static STACK: RefCell<Vec<(CUcontext, hipDevice_t)>> = RefCell::new(Vec::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct Context {
|
pub(crate) struct Context {
|
||||||
pub(crate) device: hipDevice_t,
|
pub(crate) device: hipDevice_t,
|
||||||
pub(crate) mutable: Mutex<OwnedByContext>,
|
pub(crate) state: Mutex<ContextState>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct OwnedByContext {
|
pub(crate) struct ContextState {
|
||||||
pub(crate) ref_count: usize, // only used by primary context
|
pub(crate) ref_count: u32,
|
||||||
pub(crate) _memory: FxHashSet<hipDeviceptr_t>,
|
pub(crate) flags: u32,
|
||||||
pub(crate) _streams: FxHashSet<hipStream_t>,
|
pub(crate) modules: FxHashSet<CUmodule>,
|
||||||
pub(crate) _modules: FxHashSet<CUmodule>,
|
pub(crate) storage: FxHashMap<usize, StorageData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct StorageData {
|
||||||
|
pub(crate) value: usize,
|
||||||
|
pub(crate) reset_cb: Option<extern "system" fn(CUcontext, *mut c_void, *mut c_void)>,
|
||||||
|
pub(crate) handle: CUcontext,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ContextState {
|
||||||
|
pub(crate) fn new() -> Self {
|
||||||
|
ContextState {
|
||||||
|
ref_count: 0,
|
||||||
|
flags: 0,
|
||||||
|
modules: FxHashSet::default(),
|
||||||
|
storage: FxHashMap::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn reset(&mut self) -> CUresult {
|
||||||
|
for (key, data) in self.storage.iter_mut() {
|
||||||
|
if let Some(_cb) = data.reset_cb {
|
||||||
|
// TODO: check that these callbacks do not call into the CUDA driver
|
||||||
|
// since this could result in a recursive mutex lock.
|
||||||
|
_cb(data.handle, *key as *mut c_void, data.value as *mut c_void);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.ref_count = 0;
|
||||||
|
self.flags = 0;
|
||||||
|
// drop all modules and return first error if any
|
||||||
|
let result = self.modules.drain().fold(
|
||||||
|
Ok(()), |res: CUresult, hmod| {
|
||||||
|
match (res, super::drop_checked::<module::Module>(hmod)) {
|
||||||
|
(Err(e), _) => Err(e),
|
||||||
|
(_, Err(e)) => Err(e),
|
||||||
|
_ => Ok(()),
|
||||||
|
}
|
||||||
|
});
|
||||||
|
self.storage.clear();
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Context {
|
||||||
|
pub(crate) fn new(device: hipDevice_t) -> Self {
|
||||||
|
Self {
|
||||||
|
device: device,
|
||||||
|
state: Mutex::new(ContextState::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn with_state(
|
||||||
|
&self,
|
||||||
|
fn_: impl FnOnce(&ContextState) -> CUresult,
|
||||||
|
) -> CUresult {
|
||||||
|
match self.state.lock() {
|
||||||
|
Ok(guard) => fn_(& *guard),
|
||||||
|
Err(_) => CUresult::ERROR_UNKNOWN,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn with_state_mut(
|
||||||
|
&self,
|
||||||
|
fn_: impl FnOnce(&mut ContextState) -> CUresult,
|
||||||
|
) -> CUresult {
|
||||||
|
match self.state.lock() {
|
||||||
|
Ok(mut guard) => fn_(&mut *guard),
|
||||||
|
Err(_) => CUresult::ERROR_UNKNOWN,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ZludaObject for Context {
|
impl ZludaObject for Context {
|
||||||
@ -30,16 +100,11 @@ impl ZludaObject for Context {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn new(device: hipDevice_t) -> Context {
|
pub(crate) fn get_current_context() -> Result<CUcontext, CUerror> {
|
||||||
Context {
|
if let Some(ctx) = STACK.with(|stack| stack.borrow().last().copied().map(|(ctx, _)| ctx)) {
|
||||||
device,
|
return Ok(ctx);
|
||||||
mutable: Mutex::new(OwnedByContext {
|
|
||||||
ref_count: 0,
|
|
||||||
_memory: FxHashSet::default(),
|
|
||||||
_streams: FxHashSet::default(),
|
|
||||||
_modules: FxHashSet::default(),
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
|
Err(CUerror::INVALID_CONTEXT)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: hipLimit_t) -> hipError_t {
|
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: hipLimit_t) -> hipError_t {
|
||||||
@ -54,14 +119,9 @@ pub(crate) fn synchronize() -> hipError_t {
|
|||||||
unsafe { hipDeviceSynchronize() }
|
unsafe { hipDeviceSynchronize() }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_primary(hip_dev: hipDevice_t) -> Result<(&'static Context, CUcontext), CUerror> {
|
|
||||||
let dev = driver::device(hip_dev)?;
|
|
||||||
Ok(dev.primary_context())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn set_current(raw_ctx: CUcontext) -> CUresult {
|
pub(crate) fn set_current(raw_ctx: CUcontext) -> CUresult {
|
||||||
let new_device = if raw_ctx.0 == ptr::null_mut() {
|
let new_device = if raw_ctx.0 == ptr::null_mut() {
|
||||||
CONTEXT_STACK.with(|stack| {
|
STACK.with(|stack| {
|
||||||
let mut stack = stack.borrow_mut();
|
let mut stack = stack.borrow_mut();
|
||||||
if let Some((_, old_device)) = stack.pop() {
|
if let Some((_, old_device)) = stack.pop() {
|
||||||
if let Some((_, new_device)) = stack.last() {
|
if let Some((_, new_device)) = stack.last() {
|
||||||
@ -75,7 +135,7 @@ pub(crate) fn set_current(raw_ctx: CUcontext) -> CUresult {
|
|||||||
} else {
|
} else {
|
||||||
let ctx: &Context = FromCuda::from_cuda(&raw_ctx)?;
|
let ctx: &Context = FromCuda::from_cuda(&raw_ctx)?;
|
||||||
let device = ctx.device;
|
let device = ctx.device;
|
||||||
CONTEXT_STACK.with(move |stack| {
|
STACK.with(move |stack| {
|
||||||
let mut stack = stack.borrow_mut();
|
let mut stack = stack.borrow_mut();
|
||||||
let last_device = stack.last().map(|(_, dev)| *dev);
|
let last_device = stack.last().map(|(_, dev)| *dev);
|
||||||
stack.push((raw_ctx, device));
|
stack.push((raw_ctx, device));
|
||||||
@ -91,3 +151,43 @@ pub(crate) fn set_current(raw_ctx: CUcontext) -> CUresult {
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_current(pctx: &mut CUcontext) -> CUresult {
|
||||||
|
match get_current_context() {
|
||||||
|
Ok(ctx) => *pctx = ctx,
|
||||||
|
Err(_) => *pctx = CUcontext(ptr::null_mut()),
|
||||||
|
}
|
||||||
|
CUresult::SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_device(dev: &mut hipDevice_t) -> CUresult {
|
||||||
|
let cu_ctx = get_current_context()?;
|
||||||
|
let ctx: &Context = FromCuda::from_cuda(&cu_ctx)?;
|
||||||
|
*dev = ctx.device;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pub(crate) unsafe fn push_current(ctx: CUcontext) -> CUresult {
|
||||||
|
if ctx == CUcontext(ptr::null_mut()) {
|
||||||
|
return CUresult::ERROR_INVALID_VALUE;
|
||||||
|
}
|
||||||
|
set_current(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn push_current_v2(ctx: CUcontext) -> CUresult {
|
||||||
|
push_current(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn pop_current(ctx: &mut CUcontext) -> CUresult {
|
||||||
|
STACK.with(|stack| {
|
||||||
|
if let Some((_ctx, _)) = stack.borrow_mut().pop() {
|
||||||
|
*ctx = _ctx;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn pop_current_v2(ctx: &mut CUcontext) -> CUresult {
|
||||||
|
pop_current(ctx)
|
||||||
|
}
|
@ -1,8 +1,7 @@
|
|||||||
use cuda_types::cuda::*;
|
use cuda_types::cuda::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
use std::{mem, ptr};
|
use std::{mem, ptr};
|
||||||
|
use super::{driver, context};
|
||||||
use super::context;
|
|
||||||
|
|
||||||
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
||||||
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
||||||
@ -463,30 +462,43 @@ fn clamp_usize(x: usize) -> i32 {
|
|||||||
usize::min(x, i32::MAX as usize) as i32
|
usize::min(x, i32::MAX as usize) as i32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get_primary_context(hip_dev: hipDevice_t) -> Result<(&'static context::Context, CUcontext), CUerror> {
|
||||||
|
let dev: &'static driver::Device = driver::device(hip_dev)?;
|
||||||
|
Ok(dev.primary_context())
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn primary_context_retain(
|
pub(crate) fn primary_context_retain(
|
||||||
pctx: &mut CUcontext,
|
pctx: &mut CUcontext,
|
||||||
hip_dev: hipDevice_t,
|
hip_dev: hipDevice_t,
|
||||||
) -> Result<(), CUerror> {
|
) -> CUresult {
|
||||||
let (ctx, raw_ctx) = context::get_primary(hip_dev)?;
|
let (ctx, cu_ctx) = get_primary_context(hip_dev)?;
|
||||||
{
|
|
||||||
let mut mutable_ctx = ctx.mutable.lock().map_err(|_| CUerror::UNKNOWN)?;
|
ctx.with_state_mut(|state: &mut context::ContextState| {
|
||||||
mutable_ctx.ref_count += 1;
|
state.ref_count += 1;
|
||||||
}
|
Ok(())
|
||||||
*pctx = raw_ctx;
|
})?;
|
||||||
|
|
||||||
|
*pctx = cu_ctx;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn primary_context_release(hip_dev: hipDevice_t) -> Result<(), CUerror> {
|
pub(crate) fn primary_context_release(hip_dev: hipDevice_t) -> CUresult {
|
||||||
let (ctx, _) = context::get_primary(hip_dev)?;
|
let (ctx, _) = get_primary_context(hip_dev)?;
|
||||||
{
|
|
||||||
let mut mutable_ctx = ctx.mutable.lock().map_err(|_| CUerror::UNKNOWN)?;
|
ctx.with_state_mut(|state| {
|
||||||
if mutable_ctx.ref_count == 0 {
|
state.ref_count -= 1;
|
||||||
return Err(CUerror::INVALID_CONTEXT);
|
if state.ref_count == 0 {
|
||||||
}
|
return state.reset();
|
||||||
mutable_ctx.ref_count -= 1;
|
|
||||||
if mutable_ctx.ref_count == 0 {
|
|
||||||
// TODO: drop all children
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn primary_context_reset(hip_dev: hipDevice_t) -> CUresult {
|
||||||
|
let (ctx, _) = get_primary_context(hip_dev)?;
|
||||||
|
ctx.with_state_mut(|state| {
|
||||||
|
state.reset()
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
@ -1,10 +1,10 @@
|
|||||||
use super::LiveCheck;
|
use super::{FromCuda, LiveCheck};
|
||||||
use crate::r#impl::{context, device};
|
use crate::r#impl::{context, device};
|
||||||
use comgr::Comgr;
|
use comgr::Comgr;
|
||||||
use cuda_types::cuda::*;
|
use cuda_types::cuda::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
use std::{
|
use std::{
|
||||||
ffi::{CStr, CString},
|
ffi::{CStr, CString, c_void},
|
||||||
mem, ptr, slice,
|
mem, ptr, slice,
|
||||||
sync::OnceLock,
|
sync::OnceLock,
|
||||||
usize,
|
usize,
|
||||||
@ -64,7 +64,7 @@ pub(crate) fn global_state() -> Result<&'static GlobalState, CUerror> {
|
|||||||
))
|
))
|
||||||
.map_err(|_| CUerror::UNKNOWN)?
|
.map_err(|_| CUerror::UNKNOWN)?
|
||||||
.to_owned(),
|
.to_owned(),
|
||||||
primary_context: LiveCheck::new(context::new(i)),
|
primary_context: LiveCheck::new(context::Context::new(i)),
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>, _>>()?,
|
.collect::<Result<Vec<_>, _>>()?,
|
||||||
@ -162,34 +162,72 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApi {
|
|||||||
*size = UNKNOWN_BUFFER2.len();
|
*size = UNKNOWN_BUFFER2.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe extern "system" fn context_local_storage_ctor(
|
unsafe extern "system" fn context_local_storage_put(
|
||||||
context: cuda_types::cuda::CUcontext,
|
cu_ctx: CUcontext,
|
||||||
manager: *mut std::ffi::c_void,
|
key: *mut c_void,
|
||||||
ctx_state: *mut std::ffi::c_void,
|
value: *mut c_void,
|
||||||
dtor_cb: Option<
|
dtor_cb: Option<
|
||||||
extern "system" fn(
|
extern "system" fn(
|
||||||
cuda_types::cuda::CUcontext,
|
CUcontext,
|
||||||
*mut std::ffi::c_void,
|
*mut c_void,
|
||||||
*mut std::ffi::c_void,
|
*mut c_void,
|
||||||
),
|
),
|
||||||
>,
|
>,
|
||||||
) -> cuda_types::cuda::CUresult {
|
) -> CUresult {
|
||||||
todo!()
|
let _ctx = if cu_ctx.0 != ptr::null_mut() {
|
||||||
|
cu_ctx
|
||||||
|
} else {
|
||||||
|
let mut current_ctx: CUcontext = CUcontext(ptr::null_mut());
|
||||||
|
context::get_current(&mut current_ctx)?;
|
||||||
|
current_ctx
|
||||||
|
};
|
||||||
|
let ctx_obj: &context::Context = FromCuda::from_cuda(&_ctx)?;
|
||||||
|
ctx_obj.with_state_mut(|state: &mut context::ContextState| {
|
||||||
|
state.storage.insert(
|
||||||
|
key as usize,
|
||||||
|
context::StorageData {
|
||||||
|
value: value as usize,
|
||||||
|
reset_cb: dtor_cb,
|
||||||
|
handle: _ctx,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe extern "system" fn context_local_storage_dtor(
|
unsafe extern "system" fn context_local_storage_delete(
|
||||||
arg1: *mut std::ffi::c_void,
|
cu_ctx: CUcontext,
|
||||||
arg2: *mut std::ffi::c_void,
|
key: *mut c_void,
|
||||||
) -> cuda_types::cuda::CUresult {
|
) -> CUresult {
|
||||||
todo!()
|
let ctx_obj: &context::Context = FromCuda::from_cuda(&cu_ctx)?;
|
||||||
|
ctx_obj.with_state_mut(|state: &mut context::ContextState| {
|
||||||
|
state.storage.remove(&(key as usize));
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe extern "system" fn context_local_storage_get_state(
|
unsafe extern "system" fn context_local_storage_get(
|
||||||
ctx_state: *mut std::ffi::c_void,
|
value: *mut *mut c_void,
|
||||||
cu_ctx: cuda_types::cuda::CUcontext,
|
cu_ctx: CUcontext,
|
||||||
manager: *mut std::ffi::c_void,
|
key: *mut c_void,
|
||||||
) -> cuda_types::cuda::CUresult {
|
) -> CUresult {
|
||||||
todo!()
|
let mut _ctx: CUcontext;
|
||||||
|
if cu_ctx.0 == ptr::null_mut() {
|
||||||
|
_ctx = context::get_current_context()?;
|
||||||
|
} else {
|
||||||
|
_ctx = cu_ctx
|
||||||
|
};
|
||||||
|
let ctx_obj: &context::Context = FromCuda::from_cuda(&_ctx)?;
|
||||||
|
ctx_obj.with_state(|state: &context::ContextState| {
|
||||||
|
match state.storage.get(&(key as usize)) {
|
||||||
|
Some(data) => *value = data.value as *mut c_void,
|
||||||
|
None => return CUresult::ERROR_INVALID_HANDLE
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe extern "system" fn ctx_create_v2_bypass(
|
unsafe extern "system" fn ctx_create_v2_bypass(
|
||||||
@ -268,7 +306,8 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApi {
|
|||||||
result1: *mut u32,
|
result1: *mut u32,
|
||||||
result2: *mut *const std::ffi::c_void,
|
result2: *mut *const std::ffi::c_void,
|
||||||
) -> cuda_types::cuda::CUresult {
|
) -> cuda_types::cuda::CUresult {
|
||||||
todo!()
|
*result1 = 0;
|
||||||
|
CUresult::SUCCESS
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe extern "system" fn check_fn3() -> u32 {
|
unsafe extern "system" fn check_fn3() -> u32 {
|
||||||
@ -383,3 +422,11 @@ pub(crate) unsafe fn get_proc_address_v2(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn profiler_start() -> CUresult {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn profiler_stop() -> CUresult {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
@ -1,585 +0,0 @@
|
|||||||
use hip_runtime_sys::{
|
|
||||||
hipCtxCreate, hipDevicePrimaryCtxGetState, hipDevicePrimaryCtxRelease,
|
|
||||||
hipDevicePrimaryCtxRetain, hipError_t,
|
|
||||||
};
|
|
||||||
use ocl_core::ffi::c_uchar;
|
|
||||||
|
|
||||||
use crate::r#impl;
|
|
||||||
|
|
||||||
use crate::cuda::CUresult;
|
|
||||||
use crate::r#impl::os;
|
|
||||||
use crate::{
|
|
||||||
cuda::{CUcontext, CUdevice, CUmodule, CUuuid},
|
|
||||||
cuda_impl,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{device, module, Decuda, Encuda};
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::os::raw::{c_uint, c_ulong, c_ushort};
|
|
||||||
use std::{
|
|
||||||
ffi::{c_void, CStr},
|
|
||||||
ptr,
|
|
||||||
};
|
|
||||||
use std::{mem, os::raw::c_int};
|
|
||||||
|
|
||||||
pub fn get(table: *mut *const std::os::raw::c_void, id: *const CUuuid) -> CUresult {
|
|
||||||
if table == ptr::null_mut() || id == ptr::null_mut() {
|
|
||||||
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
let id = unsafe { *id };
|
|
||||||
match id {
|
|
||||||
TOOLS_RUNTIME_CALLBACK_HOOKS_GUID => {
|
|
||||||
unsafe { *table = TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE.as_ptr() as *const _ };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
CUDART_INTERFACE_GUID => {
|
|
||||||
unsafe { *table = CUDART_INTERFACE_VTABLE.as_ptr() as *const _ };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
TOOLS_TLS_GUID => {
|
|
||||||
unsafe { *table = 1 as _ };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID => {
|
|
||||||
unsafe { *table = CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE.as_ptr() as *const _ };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
CTX_CREATE_BYPASS_GUID => {
|
|
||||||
unsafe { *table = CTX_CREATE_BYPASS_VTABLE.as_ptr() as *const _ };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
HEAP_ACCESS_GUID => {
|
|
||||||
unsafe { *table = HEAP_ACCESS_VTABLE.as_ptr() as *const _ };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
DEVICE_EXTENDED_RT_GUID => {
|
|
||||||
unsafe { *table = DEVICE_EXTENDED_RT_VTABLE.as_ptr() as *const _ };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
_ => CUresult::CUDA_ERROR_NOT_SUPPORTED,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const TOOLS_RUNTIME_CALLBACK_HOOKS_GUID: CUuuid = CUuuid {
|
|
||||||
bytes: [
|
|
||||||
0xa0, 0x94, 0x79, 0x8c, 0x2e, 0x74, 0x2e, 0x74, 0x93, 0xf2, 0x08, 0x00, 0x20, 0x0c, 0x0a,
|
|
||||||
0x66,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
#[repr(C)]
|
|
||||||
union VTableEntry {
|
|
||||||
ptr: *const (),
|
|
||||||
length: usize,
|
|
||||||
}
|
|
||||||
unsafe impl Sync for VTableEntry {}
|
|
||||||
const TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH: usize = 7;
|
|
||||||
static TOOLS_RUNTIME_CALLBACK_HOOKS_VTABLE: [VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH] = [
|
|
||||||
VTableEntry {
|
|
||||||
length: mem::size_of::<[VTableEntry; TOOLS_RUNTIME_CALLBACK_HOOKS_LENGTH]>(),
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry {
|
|
||||||
ptr: runtime_callback_hooks_fn1 as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry {
|
|
||||||
ptr: runtime_callback_hooks_fn5 as *const (),
|
|
||||||
},
|
|
||||||
];
|
|
||||||
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE: [usize; 512] = [0; 512];
|
|
||||||
|
|
||||||
unsafe extern "system" fn runtime_callback_hooks_fn1(ptr: *mut *mut usize, size: *mut usize) {
|
|
||||||
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.as_mut_ptr();
|
|
||||||
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN1_SPACE.len();
|
|
||||||
}
|
|
||||||
|
|
||||||
static mut TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE: [u8; 2] = [0; 2];
|
|
||||||
|
|
||||||
unsafe extern "system" fn runtime_callback_hooks_fn5(
|
|
||||||
ptr: *mut *mut u8,
|
|
||||||
size: *mut usize,
|
|
||||||
) -> *mut u8 {
|
|
||||||
*ptr = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
|
|
||||||
*size = TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.len();
|
|
||||||
return TOOLS_RUNTIME_CALLBACK_HOOKS_FN5_SPACE.as_mut_ptr();
|
|
||||||
}
|
|
||||||
|
|
||||||
const CUDART_INTERFACE_GUID: CUuuid = CUuuid {
|
|
||||||
bytes: [
|
|
||||||
0x6b, 0xd5, 0xfb, 0x6c, 0x5b, 0xf4, 0xe7, 0x4a, 0x89, 0x87, 0xd9, 0x39, 0x12, 0xfd, 0x9d,
|
|
||||||
0xf9,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
const CUDART_INTERFACE_LENGTH: usize = 10;
|
|
||||||
static CUDART_INTERFACE_VTABLE: [VTableEntry; CUDART_INTERFACE_LENGTH] = [
|
|
||||||
VTableEntry {
|
|
||||||
length: mem::size_of::<[VTableEntry; CUDART_INTERFACE_LENGTH]>(),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: get_module_from_cubin as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: cudart_interface_fn1 as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry {
|
|
||||||
ptr: get_module_from_cubin_ext1 as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: cudart_interface_fn6 as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: get_module_from_cubin_ext2 as _,
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
];
|
|
||||||
|
|
||||||
unsafe extern "system" fn cudart_interface_fn1(pctx: *mut CUcontext, dev: CUdevice) -> CUresult {
|
|
||||||
cudart_interface_fn1_impl(pctx, dev.0).into()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cudart_interface_fn1_impl(pctx: *mut CUcontext, dev: c_int) -> hipError_t {
|
|
||||||
let mut hip_ctx = ptr::null_mut();
|
|
||||||
let err = unsafe { hipDevicePrimaryCtxRetain(&mut hip_ctx, dev) };
|
|
||||||
if err != hipError_t::hipSuccess {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
let err = unsafe { hipDevicePrimaryCtxRelease(dev) };
|
|
||||||
if err != hipError_t::hipSuccess {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
unsafe { *pctx = hip_ctx as _ };
|
|
||||||
hipError_t::hipSuccess
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const FATBINC_MAGIC: c_uint = 0x466243B1;
|
|
||||||
const FATBINC_VERSION: c_uint = 0x1;
|
|
||||||
|
|
||||||
#[repr(C)]
|
|
||||||
struct FatbincWrapper {
|
|
||||||
magic: c_uint,
|
|
||||||
version: c_uint,
|
|
||||||
data: *const FatbinHeader,
|
|
||||||
filename_or_fatbins: *const c_void,
|
|
||||||
}
|
|
||||||
|
|
||||||
const FATBIN_MAGIC: c_uint = 0xBA55ED50;
|
|
||||||
const FATBIN_VERSION: c_ushort = 0x01;
|
|
||||||
|
|
||||||
#[repr(C, align(8))]
|
|
||||||
struct FatbinHeader {
|
|
||||||
magic: c_uint,
|
|
||||||
version: c_ushort,
|
|
||||||
header_size: c_ushort,
|
|
||||||
files_size: c_ulong, // excluding frame header, size of all blocks framed by this frame
|
|
||||||
}
|
|
||||||
|
|
||||||
const FATBIN_FILE_HEADER_KIND_PTX: c_ushort = 0x01;
|
|
||||||
const FATBIN_FILE_HEADER_VERSION_CURRENT: c_ushort = 0x101;
|
|
||||||
|
|
||||||
// assembly file header is a bit different, but we don't care
|
|
||||||
#[repr(C)]
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct FatbinFileHeader {
|
|
||||||
kind: c_ushort,
|
|
||||||
version: c_ushort,
|
|
||||||
header_size: c_uint,
|
|
||||||
padded_payload_size: c_uint,
|
|
||||||
unknown0: c_uint, // check if it's written into separately
|
|
||||||
payload_size: c_uint,
|
|
||||||
unknown1: c_uint,
|
|
||||||
unknown2: c_uint,
|
|
||||||
sm_version: c_uint,
|
|
||||||
bit_width: c_uint,
|
|
||||||
unknown3: c_uint,
|
|
||||||
unknown4: c_ulong,
|
|
||||||
unknown5: c_ulong,
|
|
||||||
uncompressed_payload: c_ulong,
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe extern "system" fn get_module_from_cubin(
|
|
||||||
result: *mut CUmodule,
|
|
||||||
fatbinc_wrapper: *const FatbincWrapper,
|
|
||||||
) -> CUresult {
|
|
||||||
if result == ptr::null_mut()
|
|
||||||
|| (*fatbinc_wrapper).magic != FATBINC_MAGIC
|
|
||||||
|| (*fatbinc_wrapper).version != FATBINC_VERSION
|
|
||||||
{
|
|
||||||
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
let fatbin_header = (*fatbinc_wrapper).data;
|
|
||||||
get_module_from_cubin_unwrapped(fatbin_header, result)
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe extern "system" fn get_module_from_cubin_unwrapped(
|
|
||||||
fatbin_header: *const FatbinHeader,
|
|
||||||
result: *mut CUmodule,
|
|
||||||
) -> CUresult {
|
|
||||||
if (*fatbin_header).magic != FATBIN_MAGIC || (*fatbin_header).version != FATBIN_VERSION {
|
|
||||||
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
let file = (fatbin_header as *const u8).add((*fatbin_header).header_size as usize);
|
|
||||||
let end = file.add((*fatbin_header).files_size as usize);
|
|
||||||
let mut ptx_files = get_ptx_files(file, end);
|
|
||||||
ptx_files.sort_unstable_by_key(|f| c_uint::max_value() - (**f).sm_version);
|
|
||||||
for file in ptx_files {
|
|
||||||
let kernel_text = match decompress_kernel_module(file) {
|
|
||||||
None => continue,
|
|
||||||
Some(vec) => vec,
|
|
||||||
};
|
|
||||||
let kernel_text_string = match CStr::from_bytes_with_nul(&kernel_text) {
|
|
||||||
Ok(c_str) => match c_str.to_str() {
|
|
||||||
Ok(s) => s,
|
|
||||||
Err(_) => continue,
|
|
||||||
},
|
|
||||||
Err(_) => continue,
|
|
||||||
};
|
|
||||||
let module = module::SpirvModule::new(kernel_text_string);
|
|
||||||
match module {
|
|
||||||
Ok(module) => {
|
|
||||||
match module::load_data_impl(result, module) {
|
|
||||||
Ok(()) => {}
|
|
||||||
Err(err) => return err.into(),
|
|
||||||
}
|
|
||||||
return CUresult::CUDA_SUCCESS;
|
|
||||||
}
|
|
||||||
Err(_) => continue,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CUresult::CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe extern "system" fn get_module_from_cubin_ext1(
|
|
||||||
result: *mut CUmodule,
|
|
||||||
fatbinc_wrapper: *const FatbincWrapper,
|
|
||||||
ptr1: *mut c_void,
|
|
||||||
ptr2: *mut c_void,
|
|
||||||
_unknown: usize,
|
|
||||||
) -> CUresult {
|
|
||||||
// Not sure what those two parameters are actually used for,
|
|
||||||
// they are somehow involved in __cudaRegisterHostVar
|
|
||||||
if ptr1 != ptr::null_mut() || ptr2 != ptr::null_mut() {
|
|
||||||
CUresult::CUDA_ERROR_NOT_SUPPORTED
|
|
||||||
} else {
|
|
||||||
get_module_from_cubin(result, fatbinc_wrapper)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn get_ptx_files(file: *const u8, end: *const u8) -> Vec<*const FatbinFileHeader> {
|
|
||||||
let mut index = file;
|
|
||||||
let mut result = Vec::new();
|
|
||||||
while index < end {
|
|
||||||
let file = index as *const FatbinFileHeader;
|
|
||||||
if (*file).kind == FATBIN_FILE_HEADER_KIND_PTX
|
|
||||||
&& (*file).version == FATBIN_FILE_HEADER_VERSION_CURRENT
|
|
||||||
{
|
|
||||||
result.push(file)
|
|
||||||
}
|
|
||||||
index = index.add((*file).header_size as usize + (*file).padded_payload_size as usize);
|
|
||||||
}
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
const MAX_PTX_MODULE_DECOMPRESSION_BOUND: usize = 16 * 1024 * 1024;
|
|
||||||
|
|
||||||
unsafe fn decompress_kernel_module(file: *const FatbinFileHeader) -> Option<Vec<u8>> {
|
|
||||||
let decompressed_size = usize::max(1024, (*file).uncompressed_payload as usize);
|
|
||||||
let mut decompressed_vec = vec![0u8; decompressed_size];
|
|
||||||
loop {
|
|
||||||
match lz4_sys::LZ4_decompress_safe(
|
|
||||||
(file as *const u8).add((*file).header_size as usize) as *const _,
|
|
||||||
decompressed_vec.as_mut_ptr() as *mut _,
|
|
||||||
(*file).payload_size as c_int,
|
|
||||||
decompressed_vec.len() as c_int,
|
|
||||||
) {
|
|
||||||
error if error < 0 => {
|
|
||||||
let new_size = decompressed_vec.len() * 2;
|
|
||||||
if new_size > MAX_PTX_MODULE_DECOMPRESSION_BOUND {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
decompressed_vec.resize(decompressed_vec.len() * 2, 0);
|
|
||||||
}
|
|
||||||
real_decompressed_size => {
|
|
||||||
decompressed_vec.truncate(real_decompressed_size as usize);
|
|
||||||
if decompressed_vec.last().copied().unwrap_or(1) != 0 {
|
|
||||||
decompressed_vec.push(0);
|
|
||||||
}
|
|
||||||
return Some(decompressed_vec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe extern "system" fn cudart_interface_fn6(_: usize) -> CUresult {
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
// From the assembly looks like arg5 is a count of something in arg3 and arg4
|
|
||||||
unsafe extern "system" fn get_module_from_cubin_ext2(
|
|
||||||
fatbinc_wrapper: *const FatbinHeader,
|
|
||||||
result: *mut CUmodule,
|
|
||||||
_arg3: usize,
|
|
||||||
_arg4: usize,
|
|
||||||
arg5: c_uint,
|
|
||||||
) -> CUresult {
|
|
||||||
if arg5 != 0 {
|
|
||||||
CUresult::CUDA_ERROR_NOT_SUPPORTED
|
|
||||||
} else {
|
|
||||||
get_module_from_cubin_unwrapped(fatbinc_wrapper, result)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const TOOLS_TLS_GUID: CUuuid = CUuuid {
|
|
||||||
bytes: [
|
|
||||||
0x42, 0xd8, 0x5a, 0x81, 0x23, 0xf6, 0xcb, 0x47, 0x82, 0x98, 0xf6, 0xe7, 0x8a, 0x3a, 0xec,
|
|
||||||
0xdc,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
const CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_GUID: CUuuid = CUuuid {
|
|
||||||
bytes: [
|
|
||||||
0xc6, 0x93, 0x33, 0x6e, 0x11, 0x21, 0xdf, 0x11, 0xa8, 0xc3, 0x68, 0xf3, 0x55, 0xd8, 0x95,
|
|
||||||
0x93,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
// the table is much bigger and starts earlier
|
|
||||||
static CONTEXT_LOCAL_STORAGE_INTERFACE_V0301_VTABLE: [VTableEntry; 4] = [
|
|
||||||
VTableEntry {
|
|
||||||
ptr: context_local_storage_ctor as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: context_local_storage_dtor as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: context_local_storage_get_state as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
];
|
|
||||||
|
|
||||||
// some kind of ctor
|
|
||||||
unsafe extern "system" fn context_local_storage_ctor(
|
|
||||||
cu_ctx: CUcontext, // always zero
|
|
||||||
mgr: *mut cuda_impl::rt::ContextStateManager,
|
|
||||||
ctx_state: *mut cuda_impl::rt::ContextState,
|
|
||||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
|
||||||
dtor_cb: Option<
|
|
||||||
extern "system" fn(
|
|
||||||
CUcontext,
|
|
||||||
*mut cuda_impl::rt::ContextStateManager,
|
|
||||||
*mut cuda_impl::rt::ContextState,
|
|
||||||
),
|
|
||||||
>,
|
|
||||||
) -> CUresult {
|
|
||||||
context_local_storage_ctor_impl(cu_ctx, mgr, ctx_state, dtor_cb);
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ContextRuntimeData {
|
|
||||||
ctx_state: *mut cuda_impl::rt::ContextState,
|
|
||||||
state_mgr: *mut cuda_impl::rt::ContextStateManager,
|
|
||||||
}
|
|
||||||
|
|
||||||
static mut PRIVATE_CONTEXT_RUNTIME_DATA: Option<HashMap<CUcontext, ContextRuntimeData>> = None;
|
|
||||||
|
|
||||||
fn context_local_storage_ctor_impl(
|
|
||||||
cu_ctx: CUcontext,
|
|
||||||
state_mgr: *mut cuda_impl::rt::ContextStateManager,
|
|
||||||
ctx_state: *mut cuda_impl::rt::ContextState,
|
|
||||||
dtor_cb: Option<
|
|
||||||
extern "system" fn(
|
|
||||||
CUcontext,
|
|
||||||
*mut cuda_impl::rt::ContextStateManager,
|
|
||||||
*mut cuda_impl::rt::ContextState,
|
|
||||||
),
|
|
||||||
>,
|
|
||||||
) {
|
|
||||||
let map = unsafe { PRIVATE_CONTEXT_RUNTIME_DATA.get_or_insert_with(|| HashMap::new()) };
|
|
||||||
map.insert(
|
|
||||||
cu_ctx,
|
|
||||||
ContextRuntimeData {
|
|
||||||
ctx_state,
|
|
||||||
state_mgr,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// some kind of dtor
|
|
||||||
unsafe extern "system" fn context_local_storage_dtor(_: *mut usize, _: *mut ()) -> u32 {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe extern "system" fn context_local_storage_get_state(
|
|
||||||
ctx_state: *mut *mut cuda_impl::rt::ContextState,
|
|
||||||
cu_ctx: CUcontext,
|
|
||||||
state_mgr: *mut cuda_impl::rt::ContextStateManager,
|
|
||||||
) -> CUresult {
|
|
||||||
context_local_storage_get_state_impl(ctx_state, cu_ctx, state_mgr).encuda()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn context_local_storage_get_state_impl(
|
|
||||||
ctx_state: *mut *mut cuda_impl::rt::ContextState,
|
|
||||||
cu_ctx: CUcontext,
|
|
||||||
_: *mut cuda_impl::rt::ContextStateManager,
|
|
||||||
) -> CUresult {
|
|
||||||
match unsafe {
|
|
||||||
PRIVATE_CONTEXT_RUNTIME_DATA
|
|
||||||
.as_ref()
|
|
||||||
.and_then(|map| map.get(&cu_ctx))
|
|
||||||
} {
|
|
||||||
Some(val) => {
|
|
||||||
unsafe { *ctx_state = val.ctx_state };
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
None => CUresult::CUDA_ERROR_INVALID_VALUE,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const CTX_CREATE_BYPASS_GUID: CUuuid = CUuuid {
|
|
||||||
bytes: [
|
|
||||||
0x0C, 0xA5, 0x0B, 0x8C, 0x10, 0x04, 0x92, 0x9A, 0x89, 0xA7, 0xD0, 0xDF, 0x10, 0xE7, 0x72,
|
|
||||||
0x86,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
const CTX_CREATE_BYPASS_LENGTH: usize = 2;
|
|
||||||
static CTX_CREATE_BYPASS_VTABLE: [VTableEntry; CTX_CREATE_BYPASS_LENGTH] = [
|
|
||||||
VTableEntry {
|
|
||||||
length: mem::size_of::<[VTableEntry; CTX_CREATE_BYPASS_LENGTH]>(),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: ctx_create_v2_bypass as *const (),
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
// I have no idea what is the difference between this function and
|
|
||||||
// cuCtxCreate_v2, but PhysX uses both interchangeably
|
|
||||||
extern "system" fn ctx_create_v2_bypass(
|
|
||||||
pctx: *mut CUcontext,
|
|
||||||
flags: ::std::os::raw::c_uint,
|
|
||||||
dev: CUdevice,
|
|
||||||
) -> CUresult {
|
|
||||||
unsafe { hipCtxCreate(pctx as _, flags, dev.0).into() }
|
|
||||||
}
|
|
||||||
|
|
||||||
const HEAP_ACCESS_GUID: CUuuid = CUuuid {
|
|
||||||
bytes: [
|
|
||||||
0x19, 0x5B, 0xCB, 0xF4, 0xD6, 0x7D, 0x02, 0x4A, 0xAC, 0xC5, 0x1D, 0x29, 0xCE, 0xA6, 0x31,
|
|
||||||
0xAE,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
#[repr(C)]
|
|
||||||
struct HeapAllocRecord {
|
|
||||||
arg1: usize,
|
|
||||||
arg2: usize,
|
|
||||||
_unknown: usize,
|
|
||||||
global_heap: *mut c_void,
|
|
||||||
}
|
|
||||||
|
|
||||||
const HEAP_ACCESS_LENGTH: usize = 3;
|
|
||||||
static HEAP_ACCESS_VTABLE: [VTableEntry; HEAP_ACCESS_LENGTH] = [
|
|
||||||
VTableEntry {
|
|
||||||
length: mem::size_of::<[VTableEntry; HEAP_ACCESS_LENGTH]>(),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: heap_alloc as *const (),
|
|
||||||
},
|
|
||||||
VTableEntry {
|
|
||||||
ptr: heap_free as *const (),
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
// TODO: reverse and implement for Linux
|
|
||||||
unsafe extern "system" fn heap_alloc(
|
|
||||||
halloc_ptr: *mut *const HeapAllocRecord,
|
|
||||||
arg1: usize,
|
|
||||||
arg2: usize,
|
|
||||||
) -> CUresult {
|
|
||||||
r#impl::unimplemented()
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: reverse and implement for Linux
|
|
||||||
unsafe extern "system" fn heap_free(halloc: *mut HeapAllocRecord, arg1: *mut usize) -> CUresult {
|
|
||||||
r#impl::unimplemented()
|
|
||||||
}
|
|
||||||
|
|
||||||
const DEVICE_EXTENDED_RT_GUID: CUuuid = CUuuid {
|
|
||||||
bytes: [
|
|
||||||
0xB1u8, 0x05, 0x41, 0xE1, 0xF7, 0xC7, 0xC7, 0x4A, 0x9F, 0x64, 0xF2, 0x23, 0xBE, 0x99, 0xF1,
|
|
||||||
0xE2,
|
|
||||||
],
|
|
||||||
};
|
|
||||||
const DEVICE_EXTENDED_RT_LENGTH: usize = 21;
|
|
||||||
static DEVICE_EXTENDED_RT_VTABLE: [VTableEntry; DEVICE_EXTENDED_RT_LENGTH] = [
|
|
||||||
VTableEntry {
|
|
||||||
length: mem::size_of::<[VTableEntry; DEVICE_EXTENDED_RT_LENGTH]>(),
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry {
|
|
||||||
ptr: device_get_attribute_ext as _,
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry {
|
|
||||||
ptr: device_get_something as _,
|
|
||||||
},
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
VTableEntry { ptr: ptr::null() },
|
|
||||||
];
|
|
||||||
|
|
||||||
unsafe extern "system" fn device_get_attribute_ext(
|
|
||||||
_dev: CUdevice,
|
|
||||||
attribute: c_uint,
|
|
||||||
unknown: c_int,
|
|
||||||
result: *mut [usize; 2],
|
|
||||||
) -> CUresult {
|
|
||||||
if result == ptr::null_mut() {
|
|
||||||
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
if unknown != 0 {
|
|
||||||
return CUresult::CUDA_ERROR_UNKNOWN;
|
|
||||||
}
|
|
||||||
// TODO: make real implementation
|
|
||||||
// Optix checks this probably toto know if HW RT is available
|
|
||||||
if attribute == 0x20000001 {
|
|
||||||
(&mut *result)[0] = 2;
|
|
||||||
(&mut *result)[1] = 0x130; // GTX 1080
|
|
||||||
} else if attribute == 0x20000002 {
|
|
||||||
(&mut *result)[0] = 2;
|
|
||||||
(&mut *result)[1] = 0x138; // GTX 1080
|
|
||||||
} else {
|
|
||||||
return CUresult::CUDA_ERROR_NOT_SUPPORTED;
|
|
||||||
}
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
||||||
|
|
||||||
// I don't know is this function return,
|
|
||||||
// but on my GTX 1060 it returns 0
|
|
||||||
unsafe extern "system" fn device_get_something(result: *mut c_uchar, _dev: CUdevice) -> CUresult {
|
|
||||||
if result == ptr::null_mut() {
|
|
||||||
return CUresult::CUDA_ERROR_INVALID_VALUE;
|
|
||||||
}
|
|
||||||
*result = 0;
|
|
||||||
CUresult::CUDA_SUCCESS
|
|
||||||
}
|
|
@ -1,7 +1,6 @@
|
|||||||
use super::module;
|
use crate::r#impl::driver;
|
||||||
|
|
||||||
use super::ZludaObject;
|
|
||||||
|
|
||||||
|
use super::{module, ZludaObject, FromCuda};
|
||||||
use cuda_types::cuda::*;
|
use cuda_types::cuda::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
|
|
||||||
@ -36,3 +35,16 @@ pub(crate) fn load_data(
|
|||||||
*library = Library { base: hip_module }.wrap();
|
*library = Library { base: hip_module }.wrap();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn unload(library: CUlibrary) -> CUresult {
|
||||||
|
super::drop_checked::<Library>(library)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn get_module(
|
||||||
|
out: &mut CUmodule,
|
||||||
|
library: CUlibrary,
|
||||||
|
) -> CUresult {
|
||||||
|
let lib: &Library = FromCuda::from_cuda(&library)?;
|
||||||
|
*out = module::Module{base: lib.base}.wrap();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
@ -45,3 +45,15 @@ pub(crate) fn set_d8_v2(dst: hipDeviceptr_t, value: ::core::ffi::c_uchar, n: usi
|
|||||||
pub(crate) fn get_info_v2(free: *mut usize, total: *mut usize) -> hipError_t {
|
pub(crate) fn get_info_v2(free: *mut usize, total: *mut usize) -> hipError_t {
|
||||||
unsafe { hipMemGetInfo(free, total) }
|
unsafe { hipMemGetInfo(free, total) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn free_host(ptr: *mut ::core::ffi::c_void) -> hipError_t {
|
||||||
|
hipFreeHost(ptr)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn host_alloc(
|
||||||
|
pp: *mut *mut ::core::ffi::c_void,
|
||||||
|
bytesize: usize,
|
||||||
|
flags: ::std::os::raw::c_uint,
|
||||||
|
) -> hipError_t {
|
||||||
|
hipHostMalloc(pp, bytesize, flags)
|
||||||
|
}
|
||||||
|
@ -137,7 +137,6 @@ from_cuda_nop!(
|
|||||||
CUdevice_attribute,
|
CUdevice_attribute,
|
||||||
CUdriverProcAddressQueryResult,
|
CUdriverProcAddressQueryResult,
|
||||||
CUjit_option,
|
CUjit_option,
|
||||||
CUlibrary,
|
|
||||||
CUlibraryOption,
|
CUlibraryOption,
|
||||||
CUmoduleLoadingMode,
|
CUmoduleLoadingMode,
|
||||||
CUuuid
|
CUuuid
|
||||||
@ -150,7 +149,7 @@ from_cuda_transmute!(
|
|||||||
CUpointer_attribute => hipPointer_attribute,
|
CUpointer_attribute => hipPointer_attribute,
|
||||||
CUdeviceptr_v2 => hipDeviceptr_t
|
CUdeviceptr_v2 => hipDeviceptr_t
|
||||||
);
|
);
|
||||||
from_cuda_object!(module::Module, context::Context);
|
from_cuda_object!(module::Module, context::Context, library::Library);
|
||||||
|
|
||||||
impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
|
impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
|
||||||
fn from_cuda(limit: &'a CUlimit) -> Result<Self, CUerror> {
|
fn from_cuda(limit: &'a CUlimit) -> Result<Self, CUerror> {
|
||||||
|
@ -8,7 +8,7 @@ use hip_runtime_sys::*;
|
|||||||
use std::{ffi::CStr, mem};
|
use std::{ffi::CStr, mem};
|
||||||
|
|
||||||
pub(crate) struct Module {
|
pub(crate) struct Module {
|
||||||
base: hipModule_t,
|
pub(crate) base: hipModule_t,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ZludaObject for Module {
|
impl ZludaObject for Module {
|
||||||
@ -89,6 +89,7 @@ pub(crate) fn load_data(module: &mut CUmodule, image: &std::ffi::c_void) -> CUre
|
|||||||
|
|
||||||
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
|
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
|
||||||
super::drop_checked::<Module>(hmod)
|
super::drop_checked::<Module>(hmod)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_function(
|
pub(crate) fn get_function(
|
||||||
|
@ -63,8 +63,14 @@ cuda_base::cuda_function_declarations!(
|
|||||||
implemented <= [
|
implemented <= [
|
||||||
cuCtxGetLimit,
|
cuCtxGetLimit,
|
||||||
cuCtxSetCurrent,
|
cuCtxSetCurrent,
|
||||||
|
cuCtxGetCurrent,
|
||||||
|
cuCtxGetDevice,
|
||||||
cuCtxSetLimit,
|
cuCtxSetLimit,
|
||||||
cuCtxSynchronize,
|
cuCtxSynchronize,
|
||||||
|
cuCtxPushCurrent,
|
||||||
|
cuCtxPushCurrent_v2,
|
||||||
|
cuCtxPopCurrent,
|
||||||
|
cuCtxPopCurrent_v2,
|
||||||
cuDeviceComputeCapability,
|
cuDeviceComputeCapability,
|
||||||
cuDeviceGet,
|
cuDeviceGet,
|
||||||
cuDeviceGetAttribute,
|
cuDeviceGetAttribute,
|
||||||
@ -76,6 +82,7 @@ cuda_base::cuda_function_declarations!(
|
|||||||
cuDeviceGetUuid_v2,
|
cuDeviceGetUuid_v2,
|
||||||
cuDevicePrimaryCtxRelease,
|
cuDevicePrimaryCtxRelease,
|
||||||
cuDevicePrimaryCtxRetain,
|
cuDevicePrimaryCtxRetain,
|
||||||
|
cuDevicePrimaryCtxReset,
|
||||||
cuDeviceTotalMem_v2,
|
cuDeviceTotalMem_v2,
|
||||||
cuDriverGetVersion,
|
cuDriverGetVersion,
|
||||||
cuFuncGetAttribute,
|
cuFuncGetAttribute,
|
||||||
@ -84,8 +91,12 @@ cuda_base::cuda_function_declarations!(
|
|||||||
cuGetProcAddress_v2,
|
cuGetProcAddress_v2,
|
||||||
cuInit,
|
cuInit,
|
||||||
cuLibraryLoadData,
|
cuLibraryLoadData,
|
||||||
|
cuLibraryGetModule,
|
||||||
|
cuLibraryUnload,
|
||||||
cuMemAlloc_v2,
|
cuMemAlloc_v2,
|
||||||
cuMemFree_v2,
|
cuMemFree_v2,
|
||||||
|
cuMemHostAlloc,
|
||||||
|
cuMemFreeHost,
|
||||||
cuMemGetAddressRange_v2,
|
cuMemGetAddressRange_v2,
|
||||||
cuMemGetInfo_v2,
|
cuMemGetInfo_v2,
|
||||||
cuMemcpyDtoH_v2,
|
cuMemcpyDtoH_v2,
|
||||||
@ -97,7 +108,9 @@ cuda_base::cuda_function_declarations!(
|
|||||||
cuModuleLoadData,
|
cuModuleLoadData,
|
||||||
cuModuleUnload,
|
cuModuleUnload,
|
||||||
cuPointerGetAttribute,
|
cuPointerGetAttribute,
|
||||||
cuStreamSynchronize
|
cuStreamSynchronize,
|
||||||
|
cuProfilerStart,
|
||||||
|
cuProfilerStop,
|
||||||
],
|
],
|
||||||
implemented_in_function <= [
|
implemented_in_function <= [
|
||||||
cuLaunchKernel,
|
cuLaunchKernel,
|
||||||
|
@ -418,25 +418,25 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApiDump {
|
|||||||
|
|
||||||
dark_api_fn_redirect_log! {
|
dark_api_fn_redirect_log! {
|
||||||
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301 {
|
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301 {
|
||||||
[0] = context_local_storage_ctor(
|
[0] = context_local_storage_put(
|
||||||
context: cuda_types::cuda::CUcontext,
|
context: cuda_types::cuda::CUcontext,
|
||||||
manager: *mut std::ffi::c_void, // ContextStateManager
|
key: *mut std::ffi::c_void,
|
||||||
ctx_state: *mut std::ffi::c_void, // ContextState
|
value: *mut std::ffi::c_void,
|
||||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||||
dtor_cb: Option<extern "system" fn(
|
dtor_cb: Option<extern "system" fn(
|
||||||
cuda_types::cuda::CUcontext,
|
cuda_types::cuda::CUcontext,
|
||||||
*mut std::ffi::c_void, // ContextStateManager
|
*mut std::ffi::c_void,
|
||||||
*mut std::ffi::c_void, // ContextState
|
*mut std::ffi::c_void,
|
||||||
)>
|
)>
|
||||||
) -> cuda_types::cuda::CUresult,
|
) -> cuda_types::cuda::CUresult,
|
||||||
[1] = context_local_storage_dtor(
|
[1] = context_local_storage_delete(
|
||||||
arg1: *mut std::ffi::c_void,
|
context: cuda_types::cuda::CUcontext,
|
||||||
arg2: *mut std::ffi::c_void
|
key: *mut std::ffi::c_void
|
||||||
) -> cuda_types::cuda::CUresult,
|
) -> cuda_types::cuda::CUresult,
|
||||||
[2] = context_local_storage_get_state(
|
[2] = context_local_storage_get(
|
||||||
ctx_state: *mut std::ffi::c_void, // ContextState
|
value: *mut *mut std::ffi::c_void,
|
||||||
cu_ctx: cuda_types::cuda::CUcontext,
|
cu_ctx: cuda_types::cuda::CUcontext,
|
||||||
manager: *mut std::ffi::c_void // ContextStateManager
|
key: *mut std::ffi::c_void
|
||||||
) -> cuda_types::cuda::CUresult
|
) -> cuda_types::cuda::CUresult
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user