mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 00:19:20 +03:00
Enable Geekbench 5 (#304)
This commit is contained in:
@ -27,6 +27,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninter
|
||||
libgl-dev libegl-dev libvdpau-dev \
|
||||
nvidia-utils-${CUDA_DRIVER} \
|
||||
cuda-cudart-dev-${CUDA_PKG_VERSION} \
|
||||
cuda-nvml-dev-${CUDA_PKG_VERSION} \
|
||||
cuda-cudart-${CUDA_PKG_VERSION} \
|
||||
cuda-profiler-api-${CUDA_PKG_VERSION} \
|
||||
cuda-nvcc-${CUDA_PKG_VERSION}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use amd_comgr_sys::*;
|
||||
use std::{ffi::CStr, iter, mem, ptr};
|
||||
use std::{ffi::CStr, mem, ptr};
|
||||
|
||||
struct Data(amd_comgr_data_t);
|
||||
|
||||
@ -137,7 +137,8 @@ pub fn compile_bitcode(
|
||||
link_with_device_libs_info.set_isa_name(gcn_arch)?;
|
||||
link_with_device_libs_info.set_language(amd_comgr_language_t::AMD_COMGR_LANGUAGE_LLVM_IR)?;
|
||||
// This makes no sense, but it makes ockl linking work
|
||||
link_with_device_libs_info.set_options([c"-Xclang", c"-mno-link-builtin-bitcode-postopt"].into_iter())?;
|
||||
link_with_device_libs_info
|
||||
.set_options([c"-Xclang", c"-mno-link-builtin-bitcode-postopt"].into_iter())?;
|
||||
let with_device_libs = do_action(
|
||||
&linked_data_set,
|
||||
&link_with_device_libs_info,
|
||||
@ -145,7 +146,20 @@ pub fn compile_bitcode(
|
||||
)?;
|
||||
let compile_action_info = ActionInfo::new()?;
|
||||
compile_action_info.set_isa_name(gcn_arch)?;
|
||||
compile_action_info.set_options(iter::once(c"-O3"))?;
|
||||
let common_options = [c"-O3", c"-mno-wavefrontsize64", c"-mcumode"].into_iter();
|
||||
let opt_options = if cfg!(debug_assertions) {
|
||||
[c"-g", c"", c"", c"", c""]
|
||||
} else {
|
||||
[
|
||||
c"-g0",
|
||||
// default inlining threshold times 10
|
||||
c"-mllvm",
|
||||
c"-inline-threshold=2250",
|
||||
c"-mllvm",
|
||||
c"-inlinehint-threshold=3250",
|
||||
]
|
||||
};
|
||||
compile_action_info.set_options(common_options.chain(opt_options))?;
|
||||
let reloc_data_set = do_action(
|
||||
&with_device_libs,
|
||||
&compile_action_info,
|
||||
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
quote = "1.0"
|
||||
syn = { version = "2.0", features = ["full", "visit-mut"] }
|
||||
syn = { version = "2.0", features = ["full", "visit-mut", "extra-traits"] }
|
||||
proc-macro2 = "1.0"
|
||||
rustc-hash = "1.1.0"
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -14,6 +14,7 @@ use syn::{
|
||||
};
|
||||
|
||||
const CUDA_RS: &'static str = include_str! {"cuda.rs"};
|
||||
const NVML_RS: &'static str = include_str! {"nvml.rs"};
|
||||
|
||||
// This macro accepts following arguments:
|
||||
// * `normal_macro`: ident for a normal macro
|
||||
@ -31,9 +32,13 @@ const CUDA_RS: &'static str = include_str! {"cuda.rs"};
|
||||
// Additionally, it does a fixup of CUDA types so they get prefixed with `type_path`
|
||||
#[proc_macro]
|
||||
pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
function_declarations(tokens, CUDA_RS)
|
||||
}
|
||||
|
||||
fn function_declarations(tokens: TokenStream, module: &str) -> TokenStream {
|
||||
let input = parse_macro_input!(tokens as FnDeclInput);
|
||||
let mut cuda_module = syn::parse_str::<File>(module).unwrap();
|
||||
let mut choose_macro = ChooseMacro::new(input);
|
||||
let mut cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
|
||||
syn::visit_mut::visit_file_mut(&mut FixFnSignatures, &mut cuda_module);
|
||||
let extern_ = if let Item::ForeignMod(extern_) = cuda_module.items.pop().unwrap() {
|
||||
extern_
|
||||
@ -68,6 +73,11 @@ pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
}
|
||||
result.into()
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn nvml_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
function_declarations(tokens, NVML_RS)
|
||||
}
|
||||
struct FnDeclInput {
|
||||
normal_macro: Path,
|
||||
overrides: Punctuated<OverrideMacro, Token![,]>,
|
||||
@ -193,6 +203,7 @@ fn join(fn_: Vec<String>, find_module: bool) -> Punctuated<Ident, Token![::]> {
|
||||
"func" => &["function"],
|
||||
"mem" => &["memory"],
|
||||
"memcpy" => &["memory", "copy"],
|
||||
"memset" => &["memory", "set"],
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
7857
cuda_base/src/nvml.rs
Normal file
7857
cuda_base/src/nvml.rs
Normal file
File diff suppressed because it is too large
Load Diff
8110
cuda_types/src/cuda.rs
Normal file
8110
cuda_types/src/cuda.rs
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
4185
cuda_types/src/nvml.rs
Normal file
4185
cuda_types/src/nvml.rs
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -1,8 +1,10 @@
|
||||
// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`:
|
||||
// /opt/rocm/llvm/bin/clang -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc
|
||||
// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`
|
||||
// `fdenormal-fp-math=dynamic` is required to make functions eligible for inlining
|
||||
// /opt/rocm/llvm/bin/clang -Xclang -fdenormal-fp-math=dynamic -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <hip/amd_detail/amd_device_functions.h>
|
||||
|
||||
#define FUNC(NAME) __device__ __attribute__((retain)) __zluda_ptx_impl_##NAME
|
||||
|
||||
@ -37,7 +39,7 @@ extern "C"
|
||||
return (uint32_t)__ockl_get_num_groups(member);
|
||||
}
|
||||
|
||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
|
||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __device__;
|
||||
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
@ -65,7 +67,7 @@ extern "C"
|
||||
return (base >> pos) & ((1UL << len) - 1UL);
|
||||
}
|
||||
|
||||
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
|
||||
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __device__;
|
||||
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
@ -120,7 +122,7 @@ extern "C"
|
||||
return (base << (64U - pos - len)) >> (64U - len);
|
||||
}
|
||||
|
||||
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __attribute__((device));
|
||||
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __device__;
|
||||
uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
@ -148,4 +150,20 @@ extern "C"
|
||||
mask = ((1UL << len) - 1UL) << (pos);
|
||||
return (~mask & base) | (mask & (insert << pos));
|
||||
}
|
||||
|
||||
void FUNC(bar_sync)(uint32_t)
|
||||
{
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
|
||||
__builtin_amdgcn_s_barrier();
|
||||
}
|
||||
|
||||
void FUNC(__assertfail)(uint64_t message,
|
||||
uint64_t file,
|
||||
uint32_t line,
|
||||
uint64_t function,
|
||||
uint64_t char_size)
|
||||
{
|
||||
(void)char_size;
|
||||
__assert_fail((const char *)message, (const char *)file, line, (const char *)function);
|
||||
}
|
||||
}
|
||||
|
@ -96,10 +96,6 @@ impl Module {
|
||||
let memory_buffer = unsafe { LLVMWriteBitcodeToMemoryBuffer(self.get()) };
|
||||
MemoryBuffer(memory_buffer)
|
||||
}
|
||||
|
||||
fn write_to_stderr(&self) {
|
||||
unsafe { LLVMDumpModule(self.get()) };
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Module {
|
||||
@ -183,7 +179,6 @@ pub(super) fn run<'input>(
|
||||
Directive2::Method(method) => emit_ctx.emit_method(method)?,
|
||||
}
|
||||
}
|
||||
module.write_to_stderr();
|
||||
if let Err(err) = module.verify() {
|
||||
panic!("{:?}", err);
|
||||
}
|
||||
@ -246,6 +241,9 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
|
||||
.map(|v| get_input_argument_type(self.context, &v.v_type, v.state_space)),
|
||||
)?;
|
||||
fn_ = unsafe { LLVMAddFunction(self.module, name.as_ptr(), fn_type) };
|
||||
self.emit_fn_attribute(fn_, "amdgpu-unsafe-fp-atomics", "true");
|
||||
self.emit_fn_attribute(fn_, "uniform-work-group-size", "true");
|
||||
self.emit_fn_attribute(fn_, "no-trapping-math", "true");
|
||||
}
|
||||
if let ast::MethodName::Func(name) = func_decl.name {
|
||||
self.resolver.register(name, fn_);
|
||||
@ -404,6 +402,19 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
|
||||
ptx_parser::ScalarType::BF16x2 => todo!(),
|
||||
})
|
||||
}
|
||||
|
||||
fn emit_fn_attribute(&self, llvm_object: LLVMValueRef, key: &str, value: &str) {
|
||||
let attribute = unsafe {
|
||||
LLVMCreateStringAttribute(
|
||||
self.context,
|
||||
key.as_ptr() as _,
|
||||
key.len() as u32,
|
||||
value.as_ptr() as _,
|
||||
value.len() as u32,
|
||||
)
|
||||
};
|
||||
unsafe { LLVMAddAttributeAtIndex(llvm_object, LLVMAttributeFunctionIndex, attribute) };
|
||||
}
|
||||
}
|
||||
|
||||
fn get_input_argument_type(
|
||||
@ -529,7 +540,7 @@ impl<'a> MethodEmitContext<'a> {
|
||||
ast::Instruction::Shl { data, arguments } => self.emit_shl(data, arguments),
|
||||
ast::Instruction::Ret { data } => Ok(self.emit_ret(data)),
|
||||
ast::Instruction::Cvta { data, arguments } => self.emit_cvta(data, arguments),
|
||||
ast::Instruction::Abs { .. } => todo!(),
|
||||
ast::Instruction::Abs { data, arguments } => self.emit_abs(data, arguments),
|
||||
ast::Instruction::Mad { data, arguments } => self.emit_mad(data, arguments),
|
||||
ast::Instruction::Fma { data, arguments } => self.emit_fma(data, arguments),
|
||||
ast::Instruction::Sub { data, arguments } => self.emit_sub(data, arguments),
|
||||
@ -539,7 +550,6 @@ impl<'a> MethodEmitContext<'a> {
|
||||
ast::Instruction::Sqrt { data, arguments } => self.emit_sqrt(data, arguments),
|
||||
ast::Instruction::Rsqrt { data, arguments } => self.emit_rsqrt(data, arguments),
|
||||
ast::Instruction::Selp { data, arguments } => self.emit_selp(data, arguments),
|
||||
ast::Instruction::Bar { .. } => todo!(),
|
||||
ast::Instruction::Atom { data, arguments } => self.emit_atom(data, arguments),
|
||||
ast::Instruction::AtomCas { data, arguments } => self.emit_atom_cas(data, arguments),
|
||||
ast::Instruction::Div { data, arguments } => self.emit_div(data, arguments),
|
||||
@ -559,6 +569,7 @@ impl<'a> MethodEmitContext<'a> {
|
||||
ast::Instruction::Trap {} => todo!(),
|
||||
// replaced by a function call
|
||||
ast::Instruction::Bfe { .. }
|
||||
| ast::Instruction::Bar { .. }
|
||||
| ast::Instruction::Bfi { .. }
|
||||
| ast::Instruction::Activemask { .. } => return Err(error_unreachable()),
|
||||
}
|
||||
@ -1570,8 +1581,12 @@ impl<'a> MethodEmitContext<'a> {
|
||||
Some(LLVMBuildFPToUI),
|
||||
)
|
||||
}
|
||||
ptx_parser::CvtMode::FPFromSigned(_) => todo!(),
|
||||
ptx_parser::CvtMode::FPFromUnsigned(_) => todo!(),
|
||||
ptx_parser::CvtMode::FPFromSigned(_) => {
|
||||
return self.emit_cvt_int_to_float(data.to, arguments, LLVMBuildSIToFP)
|
||||
}
|
||||
ptx_parser::CvtMode::FPFromUnsigned(_) => {
|
||||
return self.emit_cvt_int_to_float(data.to, arguments, LLVMBuildUIToFP)
|
||||
}
|
||||
};
|
||||
let src = self.resolver.value(arguments.src)?;
|
||||
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||
@ -1726,6 +1741,25 @@ impl<'a> MethodEmitContext<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn emit_cvt_int_to_float(
|
||||
&mut self,
|
||||
to: ptx_parser::ScalarType,
|
||||
arguments: ptx_parser::CvtArgs<SpirvWord>,
|
||||
llvm_func: unsafe extern "C" fn(
|
||||
arg1: LLVMBuilderRef,
|
||||
Val: LLVMValueRef,
|
||||
DestTy: LLVMTypeRef,
|
||||
Name: *const i8,
|
||||
) -> LLVMValueRef,
|
||||
) -> Result<(), TranslateError> {
|
||||
let type_ = get_scalar_type(self.context, to);
|
||||
let src = self.resolver.value(arguments.src)?;
|
||||
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||
llvm_func(self.builder, src, type_, dst)
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn emit_rsqrt(
|
||||
&mut self,
|
||||
data: ptx_parser::TypeFtz,
|
||||
@ -1994,7 +2028,7 @@ impl<'a> MethodEmitContext<'a> {
|
||||
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => {
|
||||
return Err(error_todo())
|
||||
}
|
||||
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.maxnum",
|
||||
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.minnum",
|
||||
};
|
||||
let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_()));
|
||||
let llvm_type = get_scalar_type(self.context, data.type_());
|
||||
@ -2021,7 +2055,7 @@ impl<'a> MethodEmitContext<'a> {
|
||||
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => {
|
||||
return Err(error_todo())
|
||||
}
|
||||
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.minnum",
|
||||
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.maxnum",
|
||||
};
|
||||
let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_()));
|
||||
let llvm_type = get_scalar_type(self.context, data.type_());
|
||||
@ -2149,6 +2183,30 @@ impl<'a> MethodEmitContext<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn emit_abs(
|
||||
&mut self,
|
||||
data: ast::TypeFtz,
|
||||
arguments: ptx_parser::AbsArgs<SpirvWord>,
|
||||
) -> Result<(), TranslateError> {
|
||||
let llvm_type = get_scalar_type(self.context, data.type_);
|
||||
let src = self.resolver.value(arguments.src)?;
|
||||
let (prefix, intrinsic_arguments) = if data.type_.kind() == ast::ScalarKind::Float {
|
||||
("llvm.fabs", vec![(src, llvm_type)])
|
||||
} else {
|
||||
let pred = get_scalar_type(self.context, ast::ScalarType::Pred);
|
||||
let zero = unsafe { LLVMConstInt(pred, 0, 0) };
|
||||
("llvm.abs", vec![(src, llvm_type), (zero, pred)])
|
||||
};
|
||||
let llvm_intrinsic = format!("{}.{}\0", prefix, LLVMTypeDisplay(data.type_));
|
||||
self.emit_intrinsic(
|
||||
unsafe { CStr::from_bytes_with_nul_unchecked(llvm_intrinsic.as_bytes()) },
|
||||
Some(arguments.dst),
|
||||
&data.type_.into(),
|
||||
intrinsic_arguments,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/*
|
||||
// Currently unused, LLVM 18 (ROCm 6.2) does not support `llvm.set.rounding`
|
||||
// Should be available in LLVM 19
|
||||
|
@ -122,6 +122,13 @@ fn run_statement<'a, 'input>(
|
||||
result.push(Statement::Instruction(instruction));
|
||||
result.extend(visitor.post.drain(..).map(Statement::Instruction));
|
||||
}
|
||||
Statement::PtrAccess(ptr_access) => {
|
||||
let statement = Statement::PtrAccess(visitor.visit_ptr_access(ptr_access)?);
|
||||
let statement = statement.visit_map(visitor)?;
|
||||
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
|
||||
result.push(statement);
|
||||
result.extend(visitor.post.drain(..).map(Statement::Instruction));
|
||||
}
|
||||
s => {
|
||||
let new_statement = s.visit_map(visitor)?;
|
||||
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
|
||||
@ -259,6 +266,41 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
|
||||
Ok(ast::Instruction::Ld { data, arguments })
|
||||
}
|
||||
|
||||
fn visit_ptr_access(
|
||||
&mut self,
|
||||
ptr_access: PtrAccess<SpirvWord>,
|
||||
) -> Result<PtrAccess<SpirvWord>, TranslateError> {
|
||||
let (old_space, new_space, name) = match self.variables.get(&ptr_access.ptr_src) {
|
||||
Some(RemapAction::LDStSpaceChange {
|
||||
old_space,
|
||||
new_space,
|
||||
name,
|
||||
}) => (*old_space, *new_space, *name),
|
||||
Some(RemapAction::PreLdPostSt { .. }) | None => return Ok(ptr_access),
|
||||
};
|
||||
if ptr_access.state_space != old_space {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
// Propagate space changes in dst
|
||||
let new_dst = self
|
||||
.resolver
|
||||
.register_unnamed(Some((ptr_access.underlying_type.clone(), new_space)));
|
||||
self.variables.insert(
|
||||
ptr_access.dst,
|
||||
RemapAction::LDStSpaceChange {
|
||||
old_space,
|
||||
new_space,
|
||||
name: new_dst,
|
||||
},
|
||||
);
|
||||
Ok(PtrAccess {
|
||||
ptr_src: name,
|
||||
dst: new_dst,
|
||||
state_space: new_space,
|
||||
..ptr_access
|
||||
})
|
||||
}
|
||||
|
||||
fn visit_variable(&mut self, var: &mut ast::Variable<SpirvWord>) -> Result<(), TranslateError> {
|
||||
let old_space = match var.state_space {
|
||||
space @ (ptx_parser::StateSpace::Reg | ptx_parser::StateSpace::Param) => space,
|
||||
|
@ -22,6 +22,7 @@ mod normalize_identifiers2;
|
||||
mod normalize_predicates2;
|
||||
mod replace_instructions_with_function_calls;
|
||||
mod resolve_function_pointers;
|
||||
mod replace_known_functions;
|
||||
|
||||
static ZLUDA_PTX_IMPL: &'static [u8] = include_bytes!("../../lib/zluda_ptx_impl.bc");
|
||||
const ZLUDA_PTX_PREFIX: &'static str = "__zluda_ptx_impl_";
|
||||
@ -42,9 +43,10 @@ pub fn to_llvm_module<'input>(ast: ast::Module<'input>) -> Result<Module, Transl
|
||||
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
||||
let sreg_map = SpecialRegistersMap2::new(&mut scoped_resolver)?;
|
||||
let directives = normalize_identifiers2::run(&mut scoped_resolver, ast.directives)?;
|
||||
let directives = replace_known_functions::run(&flat_resolver, directives);
|
||||
let directives = normalize_predicates2::run(&mut flat_resolver, directives)?;
|
||||
let directives = resolve_function_pointers::run(directives)?;
|
||||
let directives = fix_special_registers2::run(&mut flat_resolver, &sreg_map, directives)?;
|
||||
let directives: Vec<Directive2<'_, ptx_parser::Instruction<ptx_parser::ParsedOperand<SpirvWord>>, ptx_parser::ParsedOperand<SpirvWord>>> = fix_special_registers2::run(&mut flat_resolver, &sreg_map, directives)?;
|
||||
let directives = expand_operands::run(&mut flat_resolver, directives)?;
|
||||
let directives = deparamize_functions::run(&mut flat_resolver, directives)?;
|
||||
let directives = insert_explicit_load_store::run(&mut flat_resolver, directives)?;
|
||||
|
@ -104,6 +104,9 @@ fn run_instruction<'input>(
|
||||
let name = ["bfi_", scalar_to_ptx_name(data)].concat();
|
||||
to_call(resolver, fn_declarations, name.into(), i)?
|
||||
}
|
||||
i @ ptx_parser::Instruction::Bar { .. } => {
|
||||
to_call(resolver, fn_declarations, "bar_sync".into(), i)?
|
||||
}
|
||||
i => i,
|
||||
})
|
||||
}
|
||||
|
38
ptx/src/pass/replace_known_functions.rs
Normal file
38
ptx/src/pass/replace_known_functions.rs
Normal file
@ -0,0 +1,38 @@
|
||||
use super::{GlobalStringIdentResolver2, NormalizedDirective2, SpirvWord};
|
||||
|
||||
pub(crate) fn run<'input>(
|
||||
resolver: &GlobalStringIdentResolver2<'input>,
|
||||
mut directives: Vec<NormalizedDirective2<'input>>,
|
||||
) -> Vec<NormalizedDirective2<'input>> {
|
||||
for directive in directives.iter_mut() {
|
||||
match directive {
|
||||
NormalizedDirective2::Method(func) => {
|
||||
func.import_as =
|
||||
replace_with_ptx_impl(resolver, &func.func_decl.name, func.import_as.take());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
directives
|
||||
}
|
||||
|
||||
fn replace_with_ptx_impl<'input>(
|
||||
resolver: &GlobalStringIdentResolver2<'input>,
|
||||
fn_name: &ptx_parser::MethodName<'input, SpirvWord>,
|
||||
name: Option<String>,
|
||||
) -> Option<String> {
|
||||
let known_names = ["__assertfail"];
|
||||
match name {
|
||||
Some(name) if known_names.contains(&&*name) => Some(format!("__zluda_ptx_impl_{}", name)),
|
||||
Some(name) => Some(name),
|
||||
None => match fn_name {
|
||||
ptx_parser::MethodName::Func(name) => match resolver.ident_map.get(name) {
|
||||
Some(super::IdentEntry {
|
||||
name: Some(name), ..
|
||||
}) => Some(format!("__zluda_ptx_impl_{}", name)),
|
||||
_ => None,
|
||||
},
|
||||
ptx_parser::MethodName::Kernel(..) => None,
|
||||
},
|
||||
}
|
||||
}
|
@ -298,7 +298,7 @@ fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Def
|
||||
let mut result = vec![0u8.into(); output.len()];
|
||||
{
|
||||
let dev = 0;
|
||||
let mut stream = ptr::null_mut();
|
||||
let mut stream = unsafe { mem::zeroed() };
|
||||
unsafe { hipStreamCreate(&mut stream) }.unwrap();
|
||||
let mut dev_props = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut dev_props, dev) }.unwrap();
|
||||
@ -308,9 +308,9 @@ fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Def
|
||||
module.linked_bitcode(),
|
||||
)
|
||||
.unwrap();
|
||||
let mut module = ptr::null_mut();
|
||||
let mut module = unsafe { mem::zeroed() };
|
||||
unsafe { hipModuleLoadData(&mut module, elf_module.as_ptr() as _) }.unwrap();
|
||||
let mut kernel = ptr::null_mut();
|
||||
let mut kernel = unsafe { mem::zeroed() };
|
||||
unsafe { hipModuleGetFunction(&mut kernel, module, name.as_ptr()) }.unwrap();
|
||||
let mut inp_b = ptr::null_mut();
|
||||
unsafe { hipMalloc(&mut inp_b, input.len() * mem::size_of::<Input>()) }.unwrap();
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::{driver, FromCuda, ZludaObject};
|
||||
use cuda_types::*;
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use rustc_hash::FxHashSet;
|
||||
use std::{cell::RefCell, ptr, sync::Mutex};
|
||||
|
@ -1,4 +1,4 @@
|
||||
use cuda_types::*;
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{mem, ptr};
|
||||
|
||||
@ -70,6 +70,16 @@ pub(crate) fn get_attribute(
|
||||
attrib: CUdevice_attribute,
|
||||
dev_idx: hipDevice_t,
|
||||
) -> hipError_t {
|
||||
fn get_device_prop(
|
||||
pi: &mut i32,
|
||||
dev_idx: hipDevice_t,
|
||||
f: impl FnOnce(&hipDeviceProp_tR0600) -> i32,
|
||||
) -> hipError_t {
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut props, dev_idx)? };
|
||||
*pi = f(&props);
|
||||
Ok(())
|
||||
}
|
||||
match attrib {
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => {
|
||||
*pi = 32;
|
||||
@ -79,6 +89,110 @@ pub(crate) fn get_attribute(
|
||||
*pi = 0;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLayered[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLayered[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLayered[2])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DLayered[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DLayered[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER => {
|
||||
return get_device_prop(pi, dev_idx, |props| {
|
||||
(props.maxTexture2DGather[0] > 0 && props.maxTexture2DGather[1] > 0) as i32
|
||||
})
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DGather[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DGather[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture3DAlt[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture3DAlt[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture3DAlt[2])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTextureCubemap)
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTextureCubemapLayered[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTextureCubemapLayered[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface1D)
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface2D[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface2D[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface3D[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface3D[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface3D[2])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface1DLayered[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface1DLayered[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface2DLayered[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface2DLayered[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurface2DLayered[2])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurfaceCubemap)
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurfaceCubemapLayered[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxSurfaceCubemapLayered[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DLinear)
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLinear[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLinear[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLinear[2])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DMipmap[0])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DMipmap[1])
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
|
||||
*pi = COMPUTE_CAPABILITY_MAJOR;
|
||||
return Ok(());
|
||||
@ -87,6 +201,9 @@ pub(crate) fn get_attribute(
|
||||
*pi = COMPUTE_CAPABILITY_MINOR;
|
||||
return Ok(());
|
||||
}
|
||||
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
|
||||
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DMipmap)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
let attrib = remap_attribute! {
|
||||
@ -260,7 +377,7 @@ pub(crate) fn get_name(
|
||||
name: *mut ::core::ffi::c_char,
|
||||
len: ::core::ffi::c_int,
|
||||
dev: hipDevice_t,
|
||||
) -> cuda_types::CUresult {
|
||||
) -> CUresult {
|
||||
unsafe { hipDeviceGetName(name, len, dev) }?;
|
||||
let len = len as usize;
|
||||
let buffer = unsafe { std::slice::from_raw_parts(name, len) };
|
||||
@ -287,7 +404,7 @@ pub(crate) fn total_mem_v2(bytes: *mut usize, dev: hipDevice_t) -> hipError_t {
|
||||
unsafe { hipDeviceTotalMem(bytes, dev) }
|
||||
}
|
||||
|
||||
pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t) -> hipError_t {
|
||||
pub(crate) fn get_properties(prop: &mut CUdevprop, dev: hipDevice_t) -> hipError_t {
|
||||
let mut hip_props = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut hip_props, dev) }?;
|
||||
prop.maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
|
||||
|
@ -1,4 +1,4 @@
|
||||
use cuda_types::*;
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{
|
||||
ffi::{CStr, CString},
|
||||
@ -74,6 +74,6 @@ pub(crate) fn init(flags: ::core::ffi::c_uint) -> CUresult {
|
||||
}
|
||||
|
||||
pub(crate) fn get_version(version: &mut ::core::ffi::c_int) -> CUresult {
|
||||
*version = cuda_types::CUDA_VERSION as i32;
|
||||
*version = cuda_types::cuda::CUDA_VERSION as i32;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
use hip_runtime_sys::*;
|
||||
use std::mem;
|
||||
|
||||
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
|
||||
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
|
||||
@ -33,3 +34,11 @@ pub(crate) fn get_address_range_v2(
|
||||
) -> hipError_t {
|
||||
unsafe { hipMemGetAddressRange(pbase, psize, dptr) }
|
||||
}
|
||||
|
||||
pub(crate) fn set_d32_v2(dst: hipDeviceptr_t, ui: ::core::ffi::c_uint, n: usize) -> hipError_t {
|
||||
unsafe { hipMemsetD32(dst, mem::transmute(ui), n) }
|
||||
}
|
||||
|
||||
pub(crate) fn set_d8_v2(dst: hipDeviceptr_t, value: ::core::ffi::c_uchar, n: usize) -> hipError_t {
|
||||
unsafe { hipMemsetD8(dst, value, n) }
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use cuda_types::*;
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::mem::{self, ManuallyDrop, MaybeUninit};
|
||||
|
||||
@ -107,10 +107,11 @@ from_cuda_nop!(
|
||||
*const ::core::ffi::c_char,
|
||||
*mut ::core::ffi::c_void,
|
||||
*mut *mut ::core::ffi::c_void,
|
||||
u8,
|
||||
i32,
|
||||
u32,
|
||||
usize,
|
||||
cuda_types::CUdevprop,
|
||||
cuda_types::cuda::CUdevprop,
|
||||
CUdevice_attribute
|
||||
);
|
||||
from_cuda_transmute!(
|
||||
@ -136,7 +137,7 @@ impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
|
||||
|
||||
pub(crate) trait ZludaObject: Sized + Send + Sync {
|
||||
const COOKIE: usize;
|
||||
const LIVENESS_FAIL: CUerror = cuda_types::CUerror::INVALID_VALUE;
|
||||
const LIVENESS_FAIL: CUerror = cuda_types::cuda::CUerror::INVALID_VALUE;
|
||||
|
||||
type CudaHandle: Sized;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
use super::ZludaObject;
|
||||
use cuda_types::*;
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{ffi::CStr, mem};
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
use cuda_types::*;
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{ffi::c_void, ptr};
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
pub(crate) mod r#impl;
|
||||
|
||||
macro_rules! unimplemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
@ -14,7 +14,7 @@ macro_rules! unimplemented {
|
||||
}
|
||||
|
||||
macro_rules! implemented {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
@ -28,7 +28,7 @@ macro_rules! implemented {
|
||||
}
|
||||
|
||||
macro_rules! implemented_in_function {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[cfg_attr(not(test), no_mangle)]
|
||||
#[allow(improper_ctypes)]
|
||||
@ -72,6 +72,8 @@ cuda_base::cuda_function_declarations!(
|
||||
cuModuleUnload,
|
||||
cuPointerGetAttribute,
|
||||
cuMemGetAddressRange_v2,
|
||||
cuMemsetD32_v2,
|
||||
cuMemsetD8_v2
|
||||
],
|
||||
implemented_in_function <= [
|
||||
cuLaunchKernel,
|
||||
|
@ -5,7 +5,7 @@ use std::{collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::
|
||||
use syn::{
|
||||
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
|
||||
ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr, Path,
|
||||
PathArguments, Signature, Type, TypePath, UseTree,
|
||||
PathArguments, Signature, Type, TypePath, UseTree, PathSegment
|
||||
};
|
||||
|
||||
fn main() {
|
||||
@ -14,6 +14,11 @@ fn main() {
|
||||
&crate_root,
|
||||
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
|
||||
);
|
||||
generate_ml(&crate_root);
|
||||
generate_cuda(&crate_root);
|
||||
}
|
||||
|
||||
fn generate_cuda(crate_root: &PathBuf) {
|
||||
let cuda_header = bindgen::Builder::default()
|
||||
.use_core()
|
||||
.rust_target(bindgen::RustTarget::Stable_1_77)
|
||||
@ -42,16 +47,91 @@ fn main() {
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let module: syn::File = syn::parse_str(&cuda_header).unwrap();
|
||||
generate_functions(&crate_root, &["..", "cuda_base", "src", "cuda.rs"], &module);
|
||||
generate_types(&crate_root, &["..", "cuda_types", "src", "lib.rs"], &module);
|
||||
generate_functions(
|
||||
&crate_root,
|
||||
"cuda",
|
||||
&["..", "cuda_base", "src", "cuda.rs"],
|
||||
&module,
|
||||
);
|
||||
generate_types_cuda(
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "cuda.rs"],
|
||||
&module,
|
||||
);
|
||||
generate_display(
|
||||
&crate_root,
|
||||
&["..", "zluda_dump", "src", "format_generated.rs"],
|
||||
"cuda_types",
|
||||
&["cuda_types", "cuda"],
|
||||
&module,
|
||||
)
|
||||
}
|
||||
|
||||
fn generate_ml(crate_root: &PathBuf) {
|
||||
let ml_header = bindgen::Builder::default()
|
||||
.use_core()
|
||||
.rust_target(bindgen::RustTarget::Stable_1_77)
|
||||
.layout_tests(false)
|
||||
.default_enum_style(bindgen::EnumVariation::NewType {
|
||||
is_bitfield: false,
|
||||
is_global: false,
|
||||
})
|
||||
.derive_hash(true)
|
||||
.derive_eq(true)
|
||||
.header("/usr/local/cuda/include/nvml.h")
|
||||
.allowlist_type("^nvml.*")
|
||||
.allowlist_function("^nvml.*")
|
||||
.allowlist_var("^NVML.*")
|
||||
.must_use_type("nvmlReturn_t")
|
||||
.constified_enum("nvmlReturn_enum")
|
||||
.generate()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let mut module: syn::File = syn::parse_str(&ml_header).unwrap();
|
||||
let mut converter = ConvertIntoRustResult {
|
||||
type_: "nvmlReturn_t",
|
||||
underlying_type: "nvmlReturn_enum",
|
||||
new_error_type: "nvmlError_t",
|
||||
error_prefix: ("NVML_ERROR_", "ERROR_"),
|
||||
success: ("NVML_SUCCESS", "SUCCESS"),
|
||||
constants: Vec::new(),
|
||||
};
|
||||
module.items = module
|
||||
.items
|
||||
.into_iter()
|
||||
.filter_map(|item| match item {
|
||||
Item::Const(const_) => converter.get_const(const_).map(Item::Const),
|
||||
Item::Use(use_) => converter.get_use(use_).map(Item::Use),
|
||||
Item::Type(type_) => converter.get_type(type_).map(Item::Type),
|
||||
item => Some(item),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
converter.flush(&mut module.items);
|
||||
generate_functions(
|
||||
&crate_root,
|
||||
"nvml",
|
||||
&["..", "cuda_base", "src", "nvml.rs"],
|
||||
&module,
|
||||
);
|
||||
generate_types(
|
||||
&crate_root,
|
||||
&["..", "cuda_types", "src", "nvml.rs"],
|
||||
&module,
|
||||
);
|
||||
}
|
||||
|
||||
fn generate_types(crate_root: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
let non_fn = module.items.iter().filter_map(|item| match item {
|
||||
Item::ForeignMod(_) => None,
|
||||
_ => Some(item),
|
||||
});
|
||||
let module: syn::File = parse_quote! {
|
||||
#(#non_fn)*
|
||||
};
|
||||
let mut output = crate_root.clone();
|
||||
output.extend(path);
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module))
|
||||
}
|
||||
|
||||
fn generate_hip_runtime(output: &PathBuf, path: &[&str]) {
|
||||
let hiprt_header = bindgen::Builder::default()
|
||||
.use_core()
|
||||
@ -125,7 +205,7 @@ fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
fn generate_functions(output: &PathBuf, submodule: &str, path: &[&str], module: &syn::File) {
|
||||
let fns_ = module.items.iter().filter_map(|item| match item {
|
||||
Item::ForeignMod(extern_) => match &*extern_.items {
|
||||
[ForeignItem::Fn(fn_)] => Some(fn_),
|
||||
@ -138,7 +218,8 @@ fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
#(#fns_)*
|
||||
}
|
||||
};
|
||||
syn::visit_mut::visit_file_mut(&mut PrependCudaPath, &mut module);
|
||||
let submodule = Ident::new(submodule, Span::call_site());
|
||||
syn::visit_mut::visit_file_mut(&mut PrependCudaPath { module: submodule }, &mut module);
|
||||
syn::visit_mut::visit_file_mut(&mut RemoveVisibility, &mut module);
|
||||
syn::visit_mut::visit_file_mut(&mut ExplicitReturnType, &mut module);
|
||||
let mut output = output.clone();
|
||||
@ -146,7 +227,7 @@ fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
write_rust_to_file(output, &prettyplease::unparse(&module))
|
||||
}
|
||||
|
||||
fn generate_types(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
fn generate_types_cuda(output: &PathBuf, path: &[&str], module: &syn::File) {
|
||||
let mut module = module.clone();
|
||||
let mut converter = ConvertIntoRustResult {
|
||||
type_: "CUresult",
|
||||
@ -314,7 +395,9 @@ impl VisitMut for FixAbi {
|
||||
}
|
||||
}
|
||||
|
||||
struct PrependCudaPath;
|
||||
struct PrependCudaPath {
|
||||
module: Ident,
|
||||
}
|
||||
|
||||
impl VisitMut for PrependCudaPath {
|
||||
fn visit_type_path_mut(&mut self, type_: &mut TypePath) {
|
||||
@ -322,7 +405,8 @@ impl VisitMut for PrependCudaPath {
|
||||
match &*type_.path.segments[0].ident.to_string() {
|
||||
"usize" | "f64" | "f32" => {}
|
||||
_ => {
|
||||
*type_ = parse_quote! { cuda_types :: #type_ };
|
||||
let module = &self.module;
|
||||
*type_ = parse_quote! { cuda_types :: #module :: #type_ };
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -350,7 +434,7 @@ impl VisitMut for ExplicitReturnType {
|
||||
fn generate_display(
|
||||
output: &PathBuf,
|
||||
path: &[&str],
|
||||
types_crate: &'static str,
|
||||
types_crate: &[&'static str],
|
||||
module: &syn::File,
|
||||
) {
|
||||
let ignore_types = [
|
||||
@ -419,7 +503,7 @@ fn generate_display(
|
||||
}
|
||||
|
||||
struct DeriveDisplayState<'a> {
|
||||
types_crate: &'static str,
|
||||
types_crate: Path,
|
||||
ignore_types: FxHashSet<Ident>,
|
||||
ignore_fns: FxHashSet<Ident>,
|
||||
enums: FxHashMap<&'a Ident, Vec<&'a Ident>>,
|
||||
@ -430,12 +514,22 @@ struct DeriveDisplayState<'a> {
|
||||
impl<'a> DeriveDisplayState<'a> {
|
||||
fn new(
|
||||
ignore_types: &[&'static str],
|
||||
types_crate: &'static str,
|
||||
types_crate: &[&'static str],
|
||||
ignore_fns: &[&'static str],
|
||||
count_selectors: &[(&'static str, usize, usize)],
|
||||
) -> Self {
|
||||
let segments = types_crate
|
||||
.iter()
|
||||
.map(|seg| PathSegment {
|
||||
ident: Ident::new(seg, Span::call_site()),
|
||||
arguments: PathArguments::None,
|
||||
})
|
||||
.collect::<Punctuated<_, _>>();
|
||||
DeriveDisplayState {
|
||||
types_crate,
|
||||
types_crate: Path {
|
||||
leading_colon: None,
|
||||
segments,
|
||||
},
|
||||
ignore_types: ignore_types
|
||||
.into_iter()
|
||||
.map(|x| Ident::new(x, Span::call_site()))
|
||||
@ -469,8 +563,11 @@ fn cuda_derive_display_trait_for_item<'a>(
|
||||
state: &mut DeriveDisplayState<'a>,
|
||||
item: &'a Item,
|
||||
) -> Option<syn::Item> {
|
||||
let path_prefix = Path::from(Ident::new(state.types_crate, Span::call_site()));
|
||||
let path_prefix = & state.types_crate;
|
||||
let path_prefix_iter = iter::repeat(&path_prefix);
|
||||
let mut prepend_path = PrependCudaPath {
|
||||
module: Ident::new("cuda", Span::call_site()),
|
||||
};
|
||||
match item {
|
||||
Item::Const(const_) => {
|
||||
if const_.ty.to_token_stream().to_string() == "cudaError_enum" {
|
||||
@ -490,7 +587,7 @@ fn cuda_derive_display_trait_for_item<'a>(
|
||||
.iter()
|
||||
.map(|fn_arg| {
|
||||
let mut fn_arg = fn_arg.clone();
|
||||
syn::visit_mut::visit_fn_arg_mut(&mut PrependCudaPath, &mut fn_arg);
|
||||
syn::visit_mut::visit_fn_arg_mut(&mut prepend_path, &mut fn_arg);
|
||||
fn_arg
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
@ -686,7 +783,7 @@ fn curesult_display_trait(derive_state: &DeriveDisplayState) -> syn::Item {
|
||||
})
|
||||
});
|
||||
parse_quote! {
|
||||
impl crate::format::CudaDisplay for cuda_types::CUresult {
|
||||
impl crate::format::CudaDisplay for cuda_types::cuda::CUresult {
|
||||
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
|
||||
match self {
|
||||
Ok(()) => writer.write_all(b"CUDA_SUCCESS"),
|
||||
|
@ -1,7 +1,7 @@
|
||||
use crate::format;
|
||||
use crate::{log, os, trace::StateTracker};
|
||||
use crate::{log::UInt, GlobalDelayedState};
|
||||
use cuda_types::{CUmodule, CUresult, CUuuid};
|
||||
use cuda_types::cuda::*;
|
||||
use std::borrow::Cow;
|
||||
use std::hash::Hash;
|
||||
use std::{
|
||||
|
@ -1,4 +1,4 @@
|
||||
use cuda_types::{CUGLDeviceList, CUdevice};
|
||||
use cuda_types::cuda::*;
|
||||
use std::{
|
||||
ffi::{c_void, CStr},
|
||||
fmt::LowerHex,
|
||||
@ -14,7 +14,7 @@ pub(crate) trait CudaDisplay {
|
||||
) -> std::io::Result<()>;
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUuuid {
|
||||
impl CudaDisplay for CUuuid {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -26,7 +26,7 @@ impl CudaDisplay for cuda_types::CUuuid {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUdeviceptr_v1 {
|
||||
impl CudaDisplay for CUdeviceptr_v1 {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -125,7 +125,7 @@ pub fn write_handle<T: LowerHex>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUipcMemHandle {
|
||||
impl CudaDisplay for CUipcMemHandle {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -136,7 +136,7 @@ impl CudaDisplay for cuda_types::CUipcMemHandle {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUipcEventHandle {
|
||||
impl CudaDisplay for CUipcEventHandle {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -147,7 +147,7 @@ impl CudaDisplay for cuda_types::CUipcEventHandle {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUmemPoolPtrExportData_v1 {
|
||||
impl CudaDisplay for CUmemPoolPtrExportData_v1 {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -223,7 +223,7 @@ impl CudaDisplay for *mut i8 {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUstreamBatchMemOpParams {
|
||||
impl CudaDisplay for CUstreamBatchMemOpParams {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -236,15 +236,15 @@ impl CudaDisplay for cuda_types::CUstreamBatchMemOpParams {
|
||||
// distinct operations with nominally distinct union variants, but
|
||||
// in reality they are structurally different, so we take a little
|
||||
// shortcut here
|
||||
cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_32
|
||||
| cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_32 => {
|
||||
CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_32
|
||||
| CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_32 => {
|
||||
write_wait_value(&self.waitValue, writer, false)
|
||||
}
|
||||
cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_64
|
||||
| cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_64 => {
|
||||
CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_64
|
||||
| CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_64 => {
|
||||
write_wait_value(&self.waitValue, writer, true)
|
||||
}
|
||||
cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES => {
|
||||
CUstreamBatchMemOpType::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES => {
|
||||
CudaDisplay::write(&self.flushRemoteWrites, "", 0, writer)
|
||||
}
|
||||
_ => {
|
||||
@ -258,7 +258,7 @@ impl CudaDisplay for cuda_types::CUstreamBatchMemOpParams {
|
||||
}
|
||||
|
||||
pub fn write_wait_value(
|
||||
this: &cuda_types::CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
|
||||
this: &CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
is_64_bit: bool,
|
||||
) -> std::io::Result<()> {
|
||||
@ -275,7 +275,7 @@ pub fn write_wait_value(
|
||||
}
|
||||
|
||||
pub fn write_wait_value_32_or_64(
|
||||
this: &cuda_types::CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1,
|
||||
this: &CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
is_64_bit: bool,
|
||||
) -> std::io::Result<()> {
|
||||
@ -288,7 +288,7 @@ pub fn write_wait_value_32_or_64(
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUDA_RESOURCE_DESC_st {
|
||||
impl CudaDisplay for CUDA_RESOURCE_DESC_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -298,28 +298,28 @@ impl CudaDisplay for cuda_types::CUDA_RESOURCE_DESC_st {
|
||||
writer.write_all(b"{ resType: ")?;
|
||||
CudaDisplay::write(&self.resType, "", 0, writer)?;
|
||||
match self.resType {
|
||||
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_ARRAY => {
|
||||
CUresourcetype::CU_RESOURCE_TYPE_ARRAY => {
|
||||
writer.write_all(b", res: ")?;
|
||||
CudaDisplay::write(unsafe { &self.res.array }, "", 0, writer)?;
|
||||
writer.write_all(b", flags: ")?;
|
||||
CudaDisplay::write(&self.flags, "", 0, writer)?;
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY => {
|
||||
CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY => {
|
||||
writer.write_all(b", res: ")?;
|
||||
CudaDisplay::write(unsafe { &self.res.mipmap }, "", 0, writer)?;
|
||||
writer.write_all(b", flags: ")?;
|
||||
CudaDisplay::write(&self.flags, "", 0, writer)?;
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_LINEAR => {
|
||||
CUresourcetype::CU_RESOURCE_TYPE_LINEAR => {
|
||||
writer.write_all(b", res: ")?;
|
||||
CudaDisplay::write(unsafe { &self.res.linear }, "", 0, writer)?;
|
||||
writer.write_all(b", flags: ")?;
|
||||
CudaDisplay::write(&self.flags, "", 0, writer)?;
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_PITCH2D => {
|
||||
CUresourcetype::CU_RESOURCE_TYPE_PITCH2D => {
|
||||
writer.write_all(b", res: ")?;
|
||||
CudaDisplay::write(unsafe { &self.res.pitch2D }, "", 0, writer)?;
|
||||
writer.write_all(b", flags: ")?;
|
||||
@ -335,7 +335,7 @@ impl CudaDisplay for cuda_types::CUDA_RESOURCE_DESC_st {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
|
||||
impl CudaDisplay for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -345,22 +345,22 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
|
||||
writer.write_all(b"{ type: ")?;
|
||||
CudaDisplay::write(&self.type_, "", 0, writer)?;
|
||||
match self.type_ {
|
||||
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD => {
|
||||
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD => {
|
||||
writer.write_all(b", handle: ")?;
|
||||
CudaDisplay::write(unsafe { &self.handle.fd }, "", 0,writer)?;
|
||||
}
|
||||
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
|
||||
| cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
|
||||
| cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
|
||||
|cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE => {
|
||||
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
|
||||
| CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
|
||||
| CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
|
||||
|CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE => {
|
||||
write_win32_handle(unsafe { self.handle.win32 }, writer)?;
|
||||
}
|
||||
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
|
||||
| cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT => {
|
||||
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
|
||||
| CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT => {
|
||||
writer.write_all(b", handle: ")?;
|
||||
CudaDisplay::write(unsafe { &self.handle.win32.handle }, "", 0,writer)?;
|
||||
}
|
||||
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF => {
|
||||
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF => {
|
||||
writer.write_all(b", handle: ")?;
|
||||
CudaDisplay::write(unsafe { &self.handle.nvSciBufObject }, "", 0,writer)?;
|
||||
}
|
||||
@ -381,7 +381,7 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
|
||||
}
|
||||
|
||||
pub fn write_win32_handle(
|
||||
win32: cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
|
||||
win32: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
) -> std::io::Result<()> {
|
||||
if win32.handle != ptr::null_mut() {
|
||||
@ -400,7 +400,7 @@ pub fn write_win32_handle(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
|
||||
impl CudaDisplay for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -410,22 +410,22 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
|
||||
writer.write_all(b"{ type: ")?;
|
||||
CudaDisplay::write(&self.type_, "", 0, writer)?;
|
||||
match self.type_ {
|
||||
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD => {
|
||||
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD => {
|
||||
writer.write_all(b", handle: ")?;
|
||||
CudaDisplay::write(unsafe { &self.handle.fd }, "", 0,writer)?;
|
||||
}
|
||||
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
|
||||
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
|
||||
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
|
||||
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
|
||||
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT => {
|
||||
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
|
||||
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
|
||||
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
|
||||
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
|
||||
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT => {
|
||||
write_win32_handle(unsafe { mem::transmute(self.handle.win32) }, writer)?;
|
||||
}
|
||||
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT => {
|
||||
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT => {
|
||||
writer.write_all(b", handle: ")?;
|
||||
CudaDisplay::write(unsafe { &self.handle.win32.handle }, "", 0,writer)?;
|
||||
}
|
||||
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC => {
|
||||
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC => {
|
||||
writer.write_all(b", handle: ")?;
|
||||
CudaDisplay::write(unsafe { &self.handle.nvSciSyncObj }, "", 0,writer)?;
|
||||
}
|
||||
@ -442,7 +442,7 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
|
||||
}
|
||||
|
||||
impl CudaDisplay
|
||||
for cuda_types::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2
|
||||
for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2
|
||||
{
|
||||
fn write(
|
||||
&self,
|
||||
@ -457,7 +457,7 @@ impl CudaDisplay
|
||||
}
|
||||
|
||||
impl CudaDisplay
|
||||
for cuda_types::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2
|
||||
for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2
|
||||
{
|
||||
fn write(
|
||||
&self,
|
||||
@ -471,7 +471,7 @@ impl CudaDisplay
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUgraphNodeParams_st {
|
||||
impl CudaDisplay for CUgraphNodeParams_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -482,7 +482,7 @@ impl CudaDisplay for cuda_types::CUgraphNodeParams_st {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUlaunchConfig_st {
|
||||
impl CudaDisplay for CUlaunchConfig_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -493,7 +493,7 @@ impl CudaDisplay for cuda_types::CUlaunchConfig_st {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUeglFrame_st {
|
||||
impl CudaDisplay for CUeglFrame_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -504,7 +504,7 @@ impl CudaDisplay for cuda_types::CUeglFrame_st {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUdevResource_st {
|
||||
impl CudaDisplay for CUdevResource_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -514,7 +514,7 @@ impl CudaDisplay for cuda_types::CUdevResource_st {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
impl CudaDisplay for cuda_types::CUlaunchAttribute_st {
|
||||
impl CudaDisplay for CUlaunchAttribute_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -574,7 +574,7 @@ impl<T: CudaDisplay, const N: usize> CudaDisplay for [T; N] {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUarrayMapInfo_st {
|
||||
impl CudaDisplay for CUarrayMapInfo_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -585,7 +585,7 @@ impl CudaDisplay for cuda_types::CUarrayMapInfo_st {
|
||||
}
|
||||
}
|
||||
|
||||
impl CudaDisplay for cuda_types::CUexecAffinityParam_st {
|
||||
impl CudaDisplay for CUexecAffinityParam_st {
|
||||
fn write(
|
||||
&self,
|
||||
_fn_name: &'static str,
|
||||
@ -599,9 +599,9 @@ impl CudaDisplay for cuda_types::CUexecAffinityParam_st {
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuGraphKernelNodeGetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
hNode: cuda_types::CUgraphNode,
|
||||
attr: cuda_types::CUkernelNodeAttrID,
|
||||
value_out: *mut cuda_types::CUkernelNodeAttrValue,
|
||||
hNode: CUgraphNode,
|
||||
attr: CUkernelNodeAttrID,
|
||||
value_out: *mut CUkernelNodeAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(b"(hNode: ")?;
|
||||
CudaDisplay::write(&hNode, "cuGraphKernelNodeGetAttribute", 0, writer)?;
|
||||
@ -614,9 +614,9 @@ pub fn write_cuGraphKernelNodeGetAttribute(
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuGraphKernelNodeSetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
hNode: cuda_types::CUgraphNode,
|
||||
attr: cuda_types::CUkernelNodeAttrID,
|
||||
value_out: *const cuda_types::CUkernelNodeAttrValue,
|
||||
hNode: CUgraphNode,
|
||||
attr: CUkernelNodeAttrID,
|
||||
value_out: *const CUkernelNodeAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
write_cuGraphKernelNodeGetAttribute(writer, hNode, attr, value_out as *mut _)
|
||||
}
|
||||
@ -624,9 +624,9 @@ pub fn write_cuGraphKernelNodeSetAttribute(
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuStreamGetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
hStream: cuda_types::CUstream,
|
||||
attr: cuda_types::CUstreamAttrID,
|
||||
value_out: *mut cuda_types::CUstreamAttrValue,
|
||||
hStream: CUstream,
|
||||
attr: CUstreamAttrID,
|
||||
value_out: *mut CUstreamAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
writer.write_all(b"(hStream: ")?;
|
||||
CudaDisplay::write(&hStream, "cuStreamGetAttribute", 0, writer)?;
|
||||
@ -640,11 +640,11 @@ fn write_launch_attribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
fn_name: &'static str,
|
||||
index: usize,
|
||||
attribute: cuda_types::CUlaunchAttributeID,
|
||||
value_out: *mut cuda_types::CUstreamAttrValue,
|
||||
attribute: CUlaunchAttributeID,
|
||||
value_out: *mut CUstreamAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
match attribute {
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(
|
||||
unsafe { &(*value_out).accessPolicyWindow },
|
||||
@ -653,47 +653,47 @@ fn write_launch_attribute(
|
||||
writer,
|
||||
)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).cooperative }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).syncPolicy }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).clusterDim }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).clusterSchedulingPolicyPreference }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).programmaticStreamSerializationAllowed }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).programmaticEvent }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).priority }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).memSyncDomainMap }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).memSyncDomain }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).launchCompletionEvent }, fn_name, index, writer)
|
||||
}
|
||||
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
|
||||
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
|
||||
writer.write_all(b", value_out: ")?;
|
||||
CudaDisplay::write(unsafe { &(*value_out).deviceUpdatableKernelNode }, fn_name, index, writer)
|
||||
}
|
||||
@ -704,9 +704,9 @@ fn write_launch_attribute(
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuStreamGetAttribute_ptsz(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
hStream: cuda_types::CUstream,
|
||||
attr: cuda_types::CUstreamAttrID,
|
||||
value_out: *mut cuda_types::CUstreamAttrValue,
|
||||
hStream: CUstream,
|
||||
attr: CUstreamAttrID,
|
||||
value_out: *mut CUstreamAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
write_cuStreamGetAttribute(writer, hStream, attr, value_out)
|
||||
}
|
||||
@ -714,9 +714,9 @@ pub fn write_cuStreamGetAttribute_ptsz(
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuStreamSetAttribute(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
hStream: cuda_types::CUstream,
|
||||
attr: cuda_types::CUstreamAttrID,
|
||||
value_out: *const cuda_types::CUstreamAttrValue,
|
||||
hStream: CUstream,
|
||||
attr: CUstreamAttrID,
|
||||
value_out: *const CUstreamAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
write_cuStreamGetAttribute(writer, hStream, attr, value_out as *mut _)
|
||||
}
|
||||
@ -724,9 +724,9 @@ pub fn write_cuStreamSetAttribute(
|
||||
#[allow(non_snake_case)]
|
||||
pub fn write_cuStreamSetAttribute_ptsz(
|
||||
writer: &mut (impl std::io::Write + ?Sized),
|
||||
hStream: cuda_types::CUstream,
|
||||
attr: cuda_types::CUstreamAttrID,
|
||||
value_out: *const cuda_types::CUstreamAttrValue,
|
||||
hStream: CUstream,
|
||||
attr: CUstreamAttrID,
|
||||
value_out: *const CUstreamAttrValue,
|
||||
) -> std::io::Result<()> {
|
||||
write_cuStreamSetAttribute(writer, hStream, attr, value_out)
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
use cuda_types::*;
|
||||
use cuda_types::cuda::*;
|
||||
use paste::paste;
|
||||
use side_by_side::CudaDynamicFns;
|
||||
use std::io;
|
||||
@ -9,7 +9,7 @@ extern crate lazy_static;
|
||||
extern crate cuda_types;
|
||||
|
||||
macro_rules! extern_redirect {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
@ -30,7 +30,7 @@ macro_rules! extern_redirect {
|
||||
}
|
||||
|
||||
macro_rules! extern_redirect_with_post {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
|
@ -1,8 +1,5 @@
|
||||
use crate::format;
|
||||
use cuda_types::CUmodule;
|
||||
use cuda_types::CUuuid;
|
||||
|
||||
use super::CUresult;
|
||||
use cuda_types::cuda::*;
|
||||
use super::Settings;
|
||||
use std::error::Error;
|
||||
use std::ffi::c_void;
|
||||
|
@ -1,4 +1,4 @@
|
||||
use cuda_types::CUuuid;
|
||||
use cuda_types::cuda::CUuuid;
|
||||
use std::ffi::{c_void, CStr, CString};
|
||||
use std::mem;
|
||||
|
||||
|
@ -56,7 +56,7 @@ impl CudaDynamicFns {
|
||||
}
|
||||
|
||||
macro_rules! emit_cuda_fn_table {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
#[derive(Default)]
|
||||
#[allow(improper_ctypes)]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::{dark_api, log, Settings};
|
||||
use cuda_types::CUmodule;
|
||||
use cuda_types::cuda::*;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
ffi::{c_void, CStr, CString},
|
||||
|
@ -9,7 +9,7 @@ name = "zluda_with"
|
||||
path = "src/main.rs"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { version = "0.3", features = ["jobapi2", "processthreadsapi", "synchapi", "winbase", "std"] }
|
||||
winapi = { version = "0.3", features = ["jobapi2", "processthreadsapi", "synchapi", "winbase", "std", "processenv"] }
|
||||
tempfile = "3"
|
||||
argh = "0.1"
|
||||
detours-sys = { path = "../detours-sys" }
|
||||
|
@ -7,6 +7,9 @@ use std::{
|
||||
};
|
||||
|
||||
fn main() -> Result<(), VarError> {
|
||||
if std::env::var_os("CARGO_CFG_WINDOWS").is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
if env::var("PROFILE")? != "debug" {
|
||||
return Ok(());
|
||||
|
@ -1,6 +1,6 @@
|
||||
#![crate_type = "bin"]
|
||||
|
||||
#[link(name = "do_cuinit")]
|
||||
#[link(name = "do_cuinit", kind = "raw-dylib")]
|
||||
extern "system" {
|
||||
fn do_cuinit(flags: u32) -> u32;
|
||||
}
|
||||
|
@ -7,3 +7,7 @@ edition = "2021"
|
||||
[lib]
|
||||
name = "nvml"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
cuda_types = { path = "../cuda_types" }
|
||||
|
@ -1,3 +0,0 @@
|
||||
bindgen "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include\nvml.h" --whitelist-function="^nvml.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug -o src/nvml.rs
|
||||
sed -i -e 's/extern "C" {//g' -e 's/-> nvmlReturn_t;/-> nvmlReturn_t { crate::r#impl::unimplemented()/g' -e 's/pub fn /#[no_mangle] pub extern "C" fn /g' src/nvml.rs
|
||||
rustfmt src/nvml.rs
|
@ -1,4 +1,5 @@
|
||||
use crate::nvml::nvmlReturn_t;
|
||||
use cuda_types::nvml::*;
|
||||
use std::{ffi::CStr, ptr};
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
@ -9,3 +10,35 @@ pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
nvmlReturn_t::NVML_ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub(crate) fn nvmlErrorString(
|
||||
_result: cuda_types::nvml::nvmlReturn_t,
|
||||
) -> *const ::core::ffi::c_char {
|
||||
c"".as_ptr()
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub(crate) fn nvmlInit_v2() -> cuda_types::nvml::nvmlReturn_t {
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
|
||||
const VERSION: &'static CStr = c"550.77";
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub(crate) fn nvmlSystemGetDriverVersion(
|
||||
result: *mut ::core::ffi::c_char,
|
||||
length: ::core::ffi::c_uint,
|
||||
) -> cuda_types::nvml::nvmlReturn_t {
|
||||
if result == ptr::null_mut() {
|
||||
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
let version = VERSION.to_bytes_with_nul();
|
||||
let copy_length = usize::min(length as usize, version.len());
|
||||
let slice = unsafe { std::slice::from_raw_parts_mut(result.cast(), copy_length) };
|
||||
slice.copy_from_slice(&version[..copy_length]);
|
||||
if let Some(null) = slice.last_mut() {
|
||||
*null = 0;
|
||||
}
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
|
@ -1,3 +1,34 @@
|
||||
pub mod r#impl;
|
||||
#[allow(warnings)]
|
||||
mod nvml;
|
||||
mod r#impl;
|
||||
|
||||
macro_rules! unimplemented_fn {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
r#impl::unimplemented()
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! implemented_fn {
|
||||
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
|
||||
$(
|
||||
#[no_mangle]
|
||||
#[allow(improper_ctypes_definitions)]
|
||||
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
|
||||
r#impl::$fn_name($($arg_id),*)
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
cuda_base::nvml_function_declarations!(
|
||||
unimplemented_fn,
|
||||
implemented_fn <= [
|
||||
nvmlErrorString,
|
||||
nvmlInit_v2,
|
||||
nvmlSystemGetDriverVersion
|
||||
]
|
||||
);
|
||||
|
3171
zluda_ml/src/nvml.rs
3171
zluda_ml/src/nvml.rs
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user