Enable Geekbench 5 (#304)

This commit is contained in:
Andrzej Janik
2024-12-10 21:48:10 +01:00
committed by GitHub
parent 7a6df9dcbf
commit 7ac67a89e9
42 changed files with 24893 additions and 15443 deletions

View File

@ -27,6 +27,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninter
libgl-dev libegl-dev libvdpau-dev \
nvidia-utils-${CUDA_DRIVER} \
cuda-cudart-dev-${CUDA_PKG_VERSION} \
cuda-nvml-dev-${CUDA_PKG_VERSION} \
cuda-cudart-${CUDA_PKG_VERSION} \
cuda-profiler-api-${CUDA_PKG_VERSION} \
cuda-nvcc-${CUDA_PKG_VERSION}

View File

@ -1,5 +1,5 @@
use amd_comgr_sys::*;
use std::{ffi::CStr, iter, mem, ptr};
use std::{ffi::CStr, mem, ptr};
struct Data(amd_comgr_data_t);
@ -137,7 +137,8 @@ pub fn compile_bitcode(
link_with_device_libs_info.set_isa_name(gcn_arch)?;
link_with_device_libs_info.set_language(amd_comgr_language_t::AMD_COMGR_LANGUAGE_LLVM_IR)?;
// This makes no sense, but it makes ockl linking work
link_with_device_libs_info.set_options([c"-Xclang", c"-mno-link-builtin-bitcode-postopt"].into_iter())?;
link_with_device_libs_info
.set_options([c"-Xclang", c"-mno-link-builtin-bitcode-postopt"].into_iter())?;
let with_device_libs = do_action(
&linked_data_set,
&link_with_device_libs_info,
@ -145,7 +146,20 @@ pub fn compile_bitcode(
)?;
let compile_action_info = ActionInfo::new()?;
compile_action_info.set_isa_name(gcn_arch)?;
compile_action_info.set_options(iter::once(c"-O3"))?;
let common_options = [c"-O3", c"-mno-wavefrontsize64", c"-mcumode"].into_iter();
let opt_options = if cfg!(debug_assertions) {
[c"-g", c"", c"", c"", c""]
} else {
[
c"-g0",
// default inlining threshold times 10
c"-mllvm",
c"-inline-threshold=2250",
c"-mllvm",
c"-inlinehint-threshold=3250",
]
};
compile_action_info.set_options(common_options.chain(opt_options))?;
let reloc_data_set = do_action(
&with_device_libs,
&compile_action_info,

View File

@ -6,7 +6,7 @@ edition = "2021"
[dependencies]
quote = "1.0"
syn = { version = "2.0", features = ["full", "visit-mut"] }
syn = { version = "2.0", features = ["full", "visit-mut", "extra-traits"] }
proc-macro2 = "1.0"
rustc-hash = "1.1.0"

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,7 @@ use syn::{
};
const CUDA_RS: &'static str = include_str! {"cuda.rs"};
const NVML_RS: &'static str = include_str! {"nvml.rs"};
// This macro accepts following arguments:
// * `normal_macro`: ident for a normal macro
@ -31,9 +32,13 @@ const CUDA_RS: &'static str = include_str! {"cuda.rs"};
// Additionally, it does a fixup of CUDA types so they get prefixed with `type_path`
#[proc_macro]
pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUDA_RS)
}
fn function_declarations(tokens: TokenStream, module: &str) -> TokenStream {
let input = parse_macro_input!(tokens as FnDeclInput);
let mut cuda_module = syn::parse_str::<File>(module).unwrap();
let mut choose_macro = ChooseMacro::new(input);
let mut cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
syn::visit_mut::visit_file_mut(&mut FixFnSignatures, &mut cuda_module);
let extern_ = if let Item::ForeignMod(extern_) = cuda_module.items.pop().unwrap() {
extern_
@ -68,6 +73,11 @@ pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
}
result.into()
}
#[proc_macro]
pub fn nvml_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, NVML_RS)
}
struct FnDeclInput {
normal_macro: Path,
overrides: Punctuated<OverrideMacro, Token![,]>,
@ -193,6 +203,7 @@ fn join(fn_: Vec<String>, find_module: bool) -> Punctuated<Ident, Token![::]> {
"func" => &["function"],
"mem" => &["memory"],
"memcpy" => &["memory", "copy"],
"memset" => &["memory", "set"],
_ => return None,
})
}

7857
cuda_base/src/nvml.rs Normal file

File diff suppressed because it is too large Load Diff

8110
cuda_types/src/cuda.rs Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

4185
cuda_types/src/nvml.rs Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -1,8 +1,10 @@
// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`:
// /opt/rocm/llvm/bin/clang -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc
// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`
// `fdenormal-fp-math=dynamic` is required to make functions eligible for inlining
// /opt/rocm/llvm/bin/clang -Xclang -fdenormal-fp-math=dynamic -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc
#include <cstddef>
#include <cstdint>
#include <hip/amd_detail/amd_device_functions.h>
#define FUNC(NAME) __device__ __attribute__((retain)) __zluda_ptx_impl_##NAME
@ -37,7 +39,7 @@ extern "C"
return (uint32_t)__ockl_get_num_groups(member);
}
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __device__;
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
{
uint32_t pos = pos_32 & 0xFFU;
@ -65,7 +67,7 @@ extern "C"
return (base >> pos) & ((1UL << len) - 1UL);
}
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __device__;
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
{
uint32_t pos = pos_32 & 0xFFU;
@ -120,7 +122,7 @@ extern "C"
return (base << (64U - pos - len)) >> (64U - len);
}
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __attribute__((device));
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __device__;
uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32)
{
uint32_t pos = pos_32 & 0xFFU;
@ -148,4 +150,20 @@ extern "C"
mask = ((1UL << len) - 1UL) << (pos);
return (~mask & base) | (mask & (insert << pos));
}
void FUNC(bar_sync)(uint32_t)
{
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
__builtin_amdgcn_s_barrier();
}
void FUNC(__assertfail)(uint64_t message,
uint64_t file,
uint32_t line,
uint64_t function,
uint64_t char_size)
{
(void)char_size;
__assert_fail((const char *)message, (const char *)file, line, (const char *)function);
}
}

View File

@ -96,10 +96,6 @@ impl Module {
let memory_buffer = unsafe { LLVMWriteBitcodeToMemoryBuffer(self.get()) };
MemoryBuffer(memory_buffer)
}
fn write_to_stderr(&self) {
unsafe { LLVMDumpModule(self.get()) };
}
}
impl Drop for Module {
@ -183,7 +179,6 @@ pub(super) fn run<'input>(
Directive2::Method(method) => emit_ctx.emit_method(method)?,
}
}
module.write_to_stderr();
if let Err(err) = module.verify() {
panic!("{:?}", err);
}
@ -246,6 +241,9 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
.map(|v| get_input_argument_type(self.context, &v.v_type, v.state_space)),
)?;
fn_ = unsafe { LLVMAddFunction(self.module, name.as_ptr(), fn_type) };
self.emit_fn_attribute(fn_, "amdgpu-unsafe-fp-atomics", "true");
self.emit_fn_attribute(fn_, "uniform-work-group-size", "true");
self.emit_fn_attribute(fn_, "no-trapping-math", "true");
}
if let ast::MethodName::Func(name) = func_decl.name {
self.resolver.register(name, fn_);
@ -404,6 +402,19 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
ptx_parser::ScalarType::BF16x2 => todo!(),
})
}
fn emit_fn_attribute(&self, llvm_object: LLVMValueRef, key: &str, value: &str) {
let attribute = unsafe {
LLVMCreateStringAttribute(
self.context,
key.as_ptr() as _,
key.len() as u32,
value.as_ptr() as _,
value.len() as u32,
)
};
unsafe { LLVMAddAttributeAtIndex(llvm_object, LLVMAttributeFunctionIndex, attribute) };
}
}
fn get_input_argument_type(
@ -529,7 +540,7 @@ impl<'a> MethodEmitContext<'a> {
ast::Instruction::Shl { data, arguments } => self.emit_shl(data, arguments),
ast::Instruction::Ret { data } => Ok(self.emit_ret(data)),
ast::Instruction::Cvta { data, arguments } => self.emit_cvta(data, arguments),
ast::Instruction::Abs { .. } => todo!(),
ast::Instruction::Abs { data, arguments } => self.emit_abs(data, arguments),
ast::Instruction::Mad { data, arguments } => self.emit_mad(data, arguments),
ast::Instruction::Fma { data, arguments } => self.emit_fma(data, arguments),
ast::Instruction::Sub { data, arguments } => self.emit_sub(data, arguments),
@ -539,7 +550,6 @@ impl<'a> MethodEmitContext<'a> {
ast::Instruction::Sqrt { data, arguments } => self.emit_sqrt(data, arguments),
ast::Instruction::Rsqrt { data, arguments } => self.emit_rsqrt(data, arguments),
ast::Instruction::Selp { data, arguments } => self.emit_selp(data, arguments),
ast::Instruction::Bar { .. } => todo!(),
ast::Instruction::Atom { data, arguments } => self.emit_atom(data, arguments),
ast::Instruction::AtomCas { data, arguments } => self.emit_atom_cas(data, arguments),
ast::Instruction::Div { data, arguments } => self.emit_div(data, arguments),
@ -559,6 +569,7 @@ impl<'a> MethodEmitContext<'a> {
ast::Instruction::Trap {} => todo!(),
// replaced by a function call
ast::Instruction::Bfe { .. }
| ast::Instruction::Bar { .. }
| ast::Instruction::Bfi { .. }
| ast::Instruction::Activemask { .. } => return Err(error_unreachable()),
}
@ -1570,8 +1581,12 @@ impl<'a> MethodEmitContext<'a> {
Some(LLVMBuildFPToUI),
)
}
ptx_parser::CvtMode::FPFromSigned(_) => todo!(),
ptx_parser::CvtMode::FPFromUnsigned(_) => todo!(),
ptx_parser::CvtMode::FPFromSigned(_) => {
return self.emit_cvt_int_to_float(data.to, arguments, LLVMBuildSIToFP)
}
ptx_parser::CvtMode::FPFromUnsigned(_) => {
return self.emit_cvt_int_to_float(data.to, arguments, LLVMBuildUIToFP)
}
};
let src = self.resolver.value(arguments.src)?;
self.resolver.with_result(arguments.dst, |dst| unsafe {
@ -1726,6 +1741,25 @@ impl<'a> MethodEmitContext<'a> {
Ok(())
}
fn emit_cvt_int_to_float(
&mut self,
to: ptx_parser::ScalarType,
arguments: ptx_parser::CvtArgs<SpirvWord>,
llvm_func: unsafe extern "C" fn(
arg1: LLVMBuilderRef,
Val: LLVMValueRef,
DestTy: LLVMTypeRef,
Name: *const i8,
) -> LLVMValueRef,
) -> Result<(), TranslateError> {
let type_ = get_scalar_type(self.context, to);
let src = self.resolver.value(arguments.src)?;
self.resolver.with_result(arguments.dst, |dst| unsafe {
llvm_func(self.builder, src, type_, dst)
});
Ok(())
}
fn emit_rsqrt(
&mut self,
data: ptx_parser::TypeFtz,
@ -1994,7 +2028,7 @@ impl<'a> MethodEmitContext<'a> {
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => {
return Err(error_todo())
}
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.maxnum",
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.minnum",
};
let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_()));
let llvm_type = get_scalar_type(self.context, data.type_());
@ -2021,7 +2055,7 @@ impl<'a> MethodEmitContext<'a> {
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { nan: true, .. }) => {
return Err(error_todo())
}
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.minnum",
ptx_parser::MinMaxDetails::Float(ptx_parser::MinMaxFloat { .. }) => "llvm.maxnum",
};
let intrinsic = format!("{}.{}\0", llvm_prefix, LLVMTypeDisplay(data.type_()));
let llvm_type = get_scalar_type(self.context, data.type_());
@ -2149,6 +2183,30 @@ impl<'a> MethodEmitContext<'a> {
Ok(())
}
fn emit_abs(
&mut self,
data: ast::TypeFtz,
arguments: ptx_parser::AbsArgs<SpirvWord>,
) -> Result<(), TranslateError> {
let llvm_type = get_scalar_type(self.context, data.type_);
let src = self.resolver.value(arguments.src)?;
let (prefix, intrinsic_arguments) = if data.type_.kind() == ast::ScalarKind::Float {
("llvm.fabs", vec![(src, llvm_type)])
} else {
let pred = get_scalar_type(self.context, ast::ScalarType::Pred);
let zero = unsafe { LLVMConstInt(pred, 0, 0) };
("llvm.abs", vec![(src, llvm_type), (zero, pred)])
};
let llvm_intrinsic = format!("{}.{}\0", prefix, LLVMTypeDisplay(data.type_));
self.emit_intrinsic(
unsafe { CStr::from_bytes_with_nul_unchecked(llvm_intrinsic.as_bytes()) },
Some(arguments.dst),
&data.type_.into(),
intrinsic_arguments,
)?;
Ok(())
}
/*
// Currently unused, LLVM 18 (ROCm 6.2) does not support `llvm.set.rounding`
// Should be available in LLVM 19

View File

@ -122,6 +122,13 @@ fn run_statement<'a, 'input>(
result.push(Statement::Instruction(instruction));
result.extend(visitor.post.drain(..).map(Statement::Instruction));
}
Statement::PtrAccess(ptr_access) => {
let statement = Statement::PtrAccess(visitor.visit_ptr_access(ptr_access)?);
let statement = statement.visit_map(visitor)?;
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
result.push(statement);
result.extend(visitor.post.drain(..).map(Statement::Instruction));
}
s => {
let new_statement = s.visit_map(visitor)?;
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
@ -259,6 +266,41 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
Ok(ast::Instruction::Ld { data, arguments })
}
fn visit_ptr_access(
&mut self,
ptr_access: PtrAccess<SpirvWord>,
) -> Result<PtrAccess<SpirvWord>, TranslateError> {
let (old_space, new_space, name) = match self.variables.get(&ptr_access.ptr_src) {
Some(RemapAction::LDStSpaceChange {
old_space,
new_space,
name,
}) => (*old_space, *new_space, *name),
Some(RemapAction::PreLdPostSt { .. }) | None => return Ok(ptr_access),
};
if ptr_access.state_space != old_space {
return Err(error_mismatched_type());
}
// Propagate space changes in dst
let new_dst = self
.resolver
.register_unnamed(Some((ptr_access.underlying_type.clone(), new_space)));
self.variables.insert(
ptr_access.dst,
RemapAction::LDStSpaceChange {
old_space,
new_space,
name: new_dst,
},
);
Ok(PtrAccess {
ptr_src: name,
dst: new_dst,
state_space: new_space,
..ptr_access
})
}
fn visit_variable(&mut self, var: &mut ast::Variable<SpirvWord>) -> Result<(), TranslateError> {
let old_space = match var.state_space {
space @ (ptx_parser::StateSpace::Reg | ptx_parser::StateSpace::Param) => space,

View File

@ -22,6 +22,7 @@ mod normalize_identifiers2;
mod normalize_predicates2;
mod replace_instructions_with_function_calls;
mod resolve_function_pointers;
mod replace_known_functions;
static ZLUDA_PTX_IMPL: &'static [u8] = include_bytes!("../../lib/zluda_ptx_impl.bc");
const ZLUDA_PTX_PREFIX: &'static str = "__zluda_ptx_impl_";
@ -42,9 +43,10 @@ pub fn to_llvm_module<'input>(ast: ast::Module<'input>) -> Result<Module, Transl
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
let sreg_map = SpecialRegistersMap2::new(&mut scoped_resolver)?;
let directives = normalize_identifiers2::run(&mut scoped_resolver, ast.directives)?;
let directives = replace_known_functions::run(&flat_resolver, directives);
let directives = normalize_predicates2::run(&mut flat_resolver, directives)?;
let directives = resolve_function_pointers::run(directives)?;
let directives = fix_special_registers2::run(&mut flat_resolver, &sreg_map, directives)?;
let directives: Vec<Directive2<'_, ptx_parser::Instruction<ptx_parser::ParsedOperand<SpirvWord>>, ptx_parser::ParsedOperand<SpirvWord>>> = fix_special_registers2::run(&mut flat_resolver, &sreg_map, directives)?;
let directives = expand_operands::run(&mut flat_resolver, directives)?;
let directives = deparamize_functions::run(&mut flat_resolver, directives)?;
let directives = insert_explicit_load_store::run(&mut flat_resolver, directives)?;

View File

@ -104,6 +104,9 @@ fn run_instruction<'input>(
let name = ["bfi_", scalar_to_ptx_name(data)].concat();
to_call(resolver, fn_declarations, name.into(), i)?
}
i @ ptx_parser::Instruction::Bar { .. } => {
to_call(resolver, fn_declarations, "bar_sync".into(), i)?
}
i => i,
})
}

View File

@ -0,0 +1,38 @@
use super::{GlobalStringIdentResolver2, NormalizedDirective2, SpirvWord};
pub(crate) fn run<'input>(
resolver: &GlobalStringIdentResolver2<'input>,
mut directives: Vec<NormalizedDirective2<'input>>,
) -> Vec<NormalizedDirective2<'input>> {
for directive in directives.iter_mut() {
match directive {
NormalizedDirective2::Method(func) => {
func.import_as =
replace_with_ptx_impl(resolver, &func.func_decl.name, func.import_as.take());
}
_ => {}
}
}
directives
}
fn replace_with_ptx_impl<'input>(
resolver: &GlobalStringIdentResolver2<'input>,
fn_name: &ptx_parser::MethodName<'input, SpirvWord>,
name: Option<String>,
) -> Option<String> {
let known_names = ["__assertfail"];
match name {
Some(name) if known_names.contains(&&*name) => Some(format!("__zluda_ptx_impl_{}", name)),
Some(name) => Some(name),
None => match fn_name {
ptx_parser::MethodName::Func(name) => match resolver.ident_map.get(name) {
Some(super::IdentEntry {
name: Some(name), ..
}) => Some(format!("__zluda_ptx_impl_{}", name)),
_ => None,
},
ptx_parser::MethodName::Kernel(..) => None,
},
}
}

View File

@ -298,7 +298,7 @@ fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Def
let mut result = vec![0u8.into(); output.len()];
{
let dev = 0;
let mut stream = ptr::null_mut();
let mut stream = unsafe { mem::zeroed() };
unsafe { hipStreamCreate(&mut stream) }.unwrap();
let mut dev_props = unsafe { mem::zeroed() };
unsafe { hipGetDevicePropertiesR0600(&mut dev_props, dev) }.unwrap();
@ -308,9 +308,9 @@ fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Def
module.linked_bitcode(),
)
.unwrap();
let mut module = ptr::null_mut();
let mut module = unsafe { mem::zeroed() };
unsafe { hipModuleLoadData(&mut module, elf_module.as_ptr() as _) }.unwrap();
let mut kernel = ptr::null_mut();
let mut kernel = unsafe { mem::zeroed() };
unsafe { hipModuleGetFunction(&mut kernel, module, name.as_ptr()) }.unwrap();
let mut inp_b = ptr::null_mut();
unsafe { hipMalloc(&mut inp_b, input.len() * mem::size_of::<Input>()) }.unwrap();

View File

@ -1,5 +1,5 @@
use super::{driver, FromCuda, ZludaObject};
use cuda_types::*;
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use rustc_hash::FxHashSet;
use std::{cell::RefCell, ptr, sync::Mutex};

View File

@ -1,4 +1,4 @@
use cuda_types::*;
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::{mem, ptr};
@ -70,6 +70,16 @@ pub(crate) fn get_attribute(
attrib: CUdevice_attribute,
dev_idx: hipDevice_t,
) -> hipError_t {
fn get_device_prop(
pi: &mut i32,
dev_idx: hipDevice_t,
f: impl FnOnce(&hipDeviceProp_tR0600) -> i32,
) -> hipError_t {
let mut props = unsafe { mem::zeroed() };
unsafe { hipGetDevicePropertiesR0600(&mut props, dev_idx)? };
*pi = f(&props);
Ok(())
}
match attrib {
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => {
*pi = 32;
@ -79,6 +89,110 @@ pub(crate) fn get_attribute(
*pi = 0;
return Ok(());
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLayered[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLayered[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLayered[2])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DLayered[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DLayered[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER => {
return get_device_prop(pi, dev_idx, |props| {
(props.maxTexture2DGather[0] > 0 && props.maxTexture2DGather[1] > 0) as i32
})
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DGather[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DGather[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture3DAlt[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture3DAlt[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture3DAlt[2])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTextureCubemap)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTextureCubemapLayered[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS => {
return get_device_prop(pi, dev_idx, |props| props.maxTextureCubemapLayered[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface1D)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface2D[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface2D[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface3D[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface3D[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface3D[2])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface1DLayered[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface1DLayered[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface2DLayered[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface2DLayered[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => {
return get_device_prop(pi, dev_idx, |props| props.maxSurface2DLayered[2])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurfaceCubemap)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxSurfaceCubemapLayered[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS => {
return get_device_prop(pi, dev_idx, |props| props.maxSurfaceCubemapLayered[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DLinear)
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLinear[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLinear[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DLinear[2])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DMipmap[0])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture2DMipmap[1])
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
*pi = COMPUTE_CAPABILITY_MAJOR;
return Ok(());
@ -87,6 +201,9 @@ pub(crate) fn get_attribute(
*pi = COMPUTE_CAPABILITY_MINOR;
return Ok(());
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
return get_device_prop(pi, dev_idx, |props| props.maxTexture1DMipmap)
}
_ => {}
}
let attrib = remap_attribute! {
@ -260,7 +377,7 @@ pub(crate) fn get_name(
name: *mut ::core::ffi::c_char,
len: ::core::ffi::c_int,
dev: hipDevice_t,
) -> cuda_types::CUresult {
) -> CUresult {
unsafe { hipDeviceGetName(name, len, dev) }?;
let len = len as usize;
let buffer = unsafe { std::slice::from_raw_parts(name, len) };
@ -287,7 +404,7 @@ pub(crate) fn total_mem_v2(bytes: *mut usize, dev: hipDevice_t) -> hipError_t {
unsafe { hipDeviceTotalMem(bytes, dev) }
}
pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t) -> hipError_t {
pub(crate) fn get_properties(prop: &mut CUdevprop, dev: hipDevice_t) -> hipError_t {
let mut hip_props = unsafe { mem::zeroed() };
unsafe { hipGetDevicePropertiesR0600(&mut hip_props, dev) }?;
prop.maxThreadsPerBlock = hip_props.maxThreadsPerBlock;

View File

@ -1,4 +1,4 @@
use cuda_types::*;
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::{
ffi::{CStr, CString},
@ -74,6 +74,6 @@ pub(crate) fn init(flags: ::core::ffi::c_uint) -> CUresult {
}
pub(crate) fn get_version(version: &mut ::core::ffi::c_int) -> CUresult {
*version = cuda_types::CUDA_VERSION as i32;
*version = cuda_types::cuda::CUDA_VERSION as i32;
Ok(())
}

View File

@ -1,4 +1,5 @@
use hip_runtime_sys::*;
use std::mem;
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
@ -33,3 +34,11 @@ pub(crate) fn get_address_range_v2(
) -> hipError_t {
unsafe { hipMemGetAddressRange(pbase, psize, dptr) }
}
pub(crate) fn set_d32_v2(dst: hipDeviceptr_t, ui: ::core::ffi::c_uint, n: usize) -> hipError_t {
unsafe { hipMemsetD32(dst, mem::transmute(ui), n) }
}
pub(crate) fn set_d8_v2(dst: hipDeviceptr_t, value: ::core::ffi::c_uchar, n: usize) -> hipError_t {
unsafe { hipMemsetD8(dst, value, n) }
}

View File

@ -1,4 +1,4 @@
use cuda_types::*;
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::mem::{self, ManuallyDrop, MaybeUninit};
@ -107,10 +107,11 @@ from_cuda_nop!(
*const ::core::ffi::c_char,
*mut ::core::ffi::c_void,
*mut *mut ::core::ffi::c_void,
u8,
i32,
u32,
usize,
cuda_types::CUdevprop,
cuda_types::cuda::CUdevprop,
CUdevice_attribute
);
from_cuda_transmute!(
@ -136,7 +137,7 @@ impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
pub(crate) trait ZludaObject: Sized + Send + Sync {
const COOKIE: usize;
const LIVENESS_FAIL: CUerror = cuda_types::CUerror::INVALID_VALUE;
const LIVENESS_FAIL: CUerror = cuda_types::cuda::CUerror::INVALID_VALUE;
type CudaHandle: Sized;

View File

@ -1,5 +1,5 @@
use super::ZludaObject;
use cuda_types::*;
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::{ffi::CStr, mem};

View File

@ -1,4 +1,4 @@
use cuda_types::*;
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::{ffi::c_void, ptr};

View File

@ -1,7 +1,7 @@
pub(crate) mod r#impl;
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
@ -14,7 +14,7 @@ macro_rules! unimplemented {
}
macro_rules! implemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
@ -28,7 +28,7 @@ macro_rules! implemented {
}
macro_rules! implemented_in_function {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
@ -72,6 +72,8 @@ cuda_base::cuda_function_declarations!(
cuModuleUnload,
cuPointerGetAttribute,
cuMemGetAddressRange_v2,
cuMemsetD32_v2,
cuMemsetD8_v2
],
implemented_in_function <= [
cuLaunchKernel,

View File

@ -5,7 +5,7 @@ use std::{collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::
use syn::{
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr, Path,
PathArguments, Signature, Type, TypePath, UseTree,
PathArguments, Signature, Type, TypePath, UseTree, PathSegment
};
fn main() {
@ -14,6 +14,11 @@ fn main() {
&crate_root,
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
);
generate_ml(&crate_root);
generate_cuda(&crate_root);
}
fn generate_cuda(crate_root: &PathBuf) {
let cuda_header = bindgen::Builder::default()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
@ -42,16 +47,91 @@ fn main() {
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cuda_header).unwrap();
generate_functions(&crate_root, &["..", "cuda_base", "src", "cuda.rs"], &module);
generate_types(&crate_root, &["..", "cuda_types", "src", "lib.rs"], &module);
generate_functions(
&crate_root,
"cuda",
&["..", "cuda_base", "src", "cuda.rs"],
&module,
);
generate_types_cuda(
&crate_root,
&["..", "cuda_types", "src", "cuda.rs"],
&module,
);
generate_display(
&crate_root,
&["..", "zluda_dump", "src", "format_generated.rs"],
"cuda_types",
&["cuda_types", "cuda"],
&module,
)
}
fn generate_ml(crate_root: &PathBuf) {
let ml_header = bindgen::Builder::default()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
.layout_tests(false)
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
is_global: false,
})
.derive_hash(true)
.derive_eq(true)
.header("/usr/local/cuda/include/nvml.h")
.allowlist_type("^nvml.*")
.allowlist_function("^nvml.*")
.allowlist_var("^NVML.*")
.must_use_type("nvmlReturn_t")
.constified_enum("nvmlReturn_enum")
.generate()
.unwrap()
.to_string();
let mut module: syn::File = syn::parse_str(&ml_header).unwrap();
let mut converter = ConvertIntoRustResult {
type_: "nvmlReturn_t",
underlying_type: "nvmlReturn_enum",
new_error_type: "nvmlError_t",
error_prefix: ("NVML_ERROR_", "ERROR_"),
success: ("NVML_SUCCESS", "SUCCESS"),
constants: Vec::new(),
};
module.items = module
.items
.into_iter()
.filter_map(|item| match item {
Item::Const(const_) => converter.get_const(const_).map(Item::Const),
Item::Use(use_) => converter.get_use(use_).map(Item::Use),
Item::Type(type_) => converter.get_type(type_).map(Item::Type),
item => Some(item),
})
.collect::<Vec<_>>();
converter.flush(&mut module.items);
generate_functions(
&crate_root,
"nvml",
&["..", "cuda_base", "src", "nvml.rs"],
&module,
);
generate_types(
&crate_root,
&["..", "cuda_types", "src", "nvml.rs"],
&module,
);
}
fn generate_types(crate_root: &PathBuf, path: &[&str], module: &syn::File) {
let non_fn = module.items.iter().filter_map(|item| match item {
Item::ForeignMod(_) => None,
_ => Some(item),
});
let module: syn::File = parse_quote! {
#(#non_fn)*
};
let mut output = crate_root.clone();
output.extend(path);
write_rust_to_file(output, &prettyplease::unparse(&module))
}
fn generate_hip_runtime(output: &PathBuf, path: &[&str]) {
let hiprt_header = bindgen::Builder::default()
.use_core()
@ -125,7 +205,7 @@ fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
}
}
fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
fn generate_functions(output: &PathBuf, submodule: &str, path: &[&str], module: &syn::File) {
let fns_ = module.items.iter().filter_map(|item| match item {
Item::ForeignMod(extern_) => match &*extern_.items {
[ForeignItem::Fn(fn_)] => Some(fn_),
@ -138,7 +218,8 @@ fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
#(#fns_)*
}
};
syn::visit_mut::visit_file_mut(&mut PrependCudaPath, &mut module);
let submodule = Ident::new(submodule, Span::call_site());
syn::visit_mut::visit_file_mut(&mut PrependCudaPath { module: submodule }, &mut module);
syn::visit_mut::visit_file_mut(&mut RemoveVisibility, &mut module);
syn::visit_mut::visit_file_mut(&mut ExplicitReturnType, &mut module);
let mut output = output.clone();
@ -146,7 +227,7 @@ fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
write_rust_to_file(output, &prettyplease::unparse(&module))
}
fn generate_types(output: &PathBuf, path: &[&str], module: &syn::File) {
fn generate_types_cuda(output: &PathBuf, path: &[&str], module: &syn::File) {
let mut module = module.clone();
let mut converter = ConvertIntoRustResult {
type_: "CUresult",
@ -314,7 +395,9 @@ impl VisitMut for FixAbi {
}
}
struct PrependCudaPath;
struct PrependCudaPath {
module: Ident,
}
impl VisitMut for PrependCudaPath {
fn visit_type_path_mut(&mut self, type_: &mut TypePath) {
@ -322,7 +405,8 @@ impl VisitMut for PrependCudaPath {
match &*type_.path.segments[0].ident.to_string() {
"usize" | "f64" | "f32" => {}
_ => {
*type_ = parse_quote! { cuda_types :: #type_ };
let module = &self.module;
*type_ = parse_quote! { cuda_types :: #module :: #type_ };
}
}
}
@ -350,7 +434,7 @@ impl VisitMut for ExplicitReturnType {
fn generate_display(
output: &PathBuf,
path: &[&str],
types_crate: &'static str,
types_crate: &[&'static str],
module: &syn::File,
) {
let ignore_types = [
@ -419,7 +503,7 @@ fn generate_display(
}
struct DeriveDisplayState<'a> {
types_crate: &'static str,
types_crate: Path,
ignore_types: FxHashSet<Ident>,
ignore_fns: FxHashSet<Ident>,
enums: FxHashMap<&'a Ident, Vec<&'a Ident>>,
@ -430,12 +514,22 @@ struct DeriveDisplayState<'a> {
impl<'a> DeriveDisplayState<'a> {
fn new(
ignore_types: &[&'static str],
types_crate: &'static str,
types_crate: &[&'static str],
ignore_fns: &[&'static str],
count_selectors: &[(&'static str, usize, usize)],
) -> Self {
let segments = types_crate
.iter()
.map(|seg| PathSegment {
ident: Ident::new(seg, Span::call_site()),
arguments: PathArguments::None,
})
.collect::<Punctuated<_, _>>();
DeriveDisplayState {
types_crate,
types_crate: Path {
leading_colon: None,
segments,
},
ignore_types: ignore_types
.into_iter()
.map(|x| Ident::new(x, Span::call_site()))
@ -469,8 +563,11 @@ fn cuda_derive_display_trait_for_item<'a>(
state: &mut DeriveDisplayState<'a>,
item: &'a Item,
) -> Option<syn::Item> {
let path_prefix = Path::from(Ident::new(state.types_crate, Span::call_site()));
let path_prefix = & state.types_crate;
let path_prefix_iter = iter::repeat(&path_prefix);
let mut prepend_path = PrependCudaPath {
module: Ident::new("cuda", Span::call_site()),
};
match item {
Item::Const(const_) => {
if const_.ty.to_token_stream().to_string() == "cudaError_enum" {
@ -490,7 +587,7 @@ fn cuda_derive_display_trait_for_item<'a>(
.iter()
.map(|fn_arg| {
let mut fn_arg = fn_arg.clone();
syn::visit_mut::visit_fn_arg_mut(&mut PrependCudaPath, &mut fn_arg);
syn::visit_mut::visit_fn_arg_mut(&mut prepend_path, &mut fn_arg);
fn_arg
})
.collect::<Vec<_>>();
@ -686,7 +783,7 @@ fn curesult_display_trait(derive_state: &DeriveDisplayState) -> syn::Item {
})
});
parse_quote! {
impl crate::format::CudaDisplay for cuda_types::CUresult {
impl crate::format::CudaDisplay for cuda_types::cuda::CUresult {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
match self {
Ok(()) => writer.write_all(b"CUDA_SUCCESS"),

View File

@ -1,7 +1,7 @@
use crate::format;
use crate::{log, os, trace::StateTracker};
use crate::{log::UInt, GlobalDelayedState};
use cuda_types::{CUmodule, CUresult, CUuuid};
use cuda_types::cuda::*;
use std::borrow::Cow;
use std::hash::Hash;
use std::{

View File

@ -1,4 +1,4 @@
use cuda_types::{CUGLDeviceList, CUdevice};
use cuda_types::cuda::*;
use std::{
ffi::{c_void, CStr},
fmt::LowerHex,
@ -14,7 +14,7 @@ pub(crate) trait CudaDisplay {
) -> std::io::Result<()>;
}
impl CudaDisplay for cuda_types::CUuuid {
impl CudaDisplay for CUuuid {
fn write(
&self,
_fn_name: &'static str,
@ -26,7 +26,7 @@ impl CudaDisplay for cuda_types::CUuuid {
}
}
impl CudaDisplay for cuda_types::CUdeviceptr_v1 {
impl CudaDisplay for CUdeviceptr_v1 {
fn write(
&self,
_fn_name: &'static str,
@ -125,7 +125,7 @@ pub fn write_handle<T: LowerHex>(
Ok(())
}
impl CudaDisplay for cuda_types::CUipcMemHandle {
impl CudaDisplay for CUipcMemHandle {
fn write(
&self,
_fn_name: &'static str,
@ -136,7 +136,7 @@ impl CudaDisplay for cuda_types::CUipcMemHandle {
}
}
impl CudaDisplay for cuda_types::CUipcEventHandle {
impl CudaDisplay for CUipcEventHandle {
fn write(
&self,
_fn_name: &'static str,
@ -147,7 +147,7 @@ impl CudaDisplay for cuda_types::CUipcEventHandle {
}
}
impl CudaDisplay for cuda_types::CUmemPoolPtrExportData_v1 {
impl CudaDisplay for CUmemPoolPtrExportData_v1 {
fn write(
&self,
_fn_name: &'static str,
@ -223,7 +223,7 @@ impl CudaDisplay for *mut i8 {
}
}
impl CudaDisplay for cuda_types::CUstreamBatchMemOpParams {
impl CudaDisplay for CUstreamBatchMemOpParams {
fn write(
&self,
_fn_name: &'static str,
@ -236,15 +236,15 @@ impl CudaDisplay for cuda_types::CUstreamBatchMemOpParams {
// distinct operations with nominally distinct union variants, but
// in reality they are structurally different, so we take a little
// shortcut here
cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_32
| cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_32 => {
CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_32
| CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_32 => {
write_wait_value(&self.waitValue, writer, false)
}
cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_64
| cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_64 => {
CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WAIT_VALUE_64
| CUstreamBatchMemOpType::CU_STREAM_MEM_OP_WRITE_VALUE_64 => {
write_wait_value(&self.waitValue, writer, true)
}
cuda_types::CUstreamBatchMemOpType::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES => {
CUstreamBatchMemOpType::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES => {
CudaDisplay::write(&self.flushRemoteWrites, "", 0, writer)
}
_ => {
@ -258,7 +258,7 @@ impl CudaDisplay for cuda_types::CUstreamBatchMemOpParams {
}
pub fn write_wait_value(
this: &cuda_types::CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
this: &CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
writer: &mut (impl std::io::Write + ?Sized),
is_64_bit: bool,
) -> std::io::Result<()> {
@ -275,7 +275,7 @@ pub fn write_wait_value(
}
pub fn write_wait_value_32_or_64(
this: &cuda_types::CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1,
this: &CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st__bindgen_ty_1,
writer: &mut (impl std::io::Write + ?Sized),
is_64_bit: bool,
) -> std::io::Result<()> {
@ -288,7 +288,7 @@ pub fn write_wait_value_32_or_64(
}
}
impl CudaDisplay for cuda_types::CUDA_RESOURCE_DESC_st {
impl CudaDisplay for CUDA_RESOURCE_DESC_st {
fn write(
&self,
_fn_name: &'static str,
@ -298,28 +298,28 @@ impl CudaDisplay for cuda_types::CUDA_RESOURCE_DESC_st {
writer.write_all(b"{ resType: ")?;
CudaDisplay::write(&self.resType, "", 0, writer)?;
match self.resType {
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_ARRAY => {
CUresourcetype::CU_RESOURCE_TYPE_ARRAY => {
writer.write_all(b", res: ")?;
CudaDisplay::write(unsafe { &self.res.array }, "", 0, writer)?;
writer.write_all(b", flags: ")?;
CudaDisplay::write(&self.flags, "", 0, writer)?;
writer.write_all(b" }")
}
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY => {
CUresourcetype::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY => {
writer.write_all(b", res: ")?;
CudaDisplay::write(unsafe { &self.res.mipmap }, "", 0, writer)?;
writer.write_all(b", flags: ")?;
CudaDisplay::write(&self.flags, "", 0, writer)?;
writer.write_all(b" }")
}
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_LINEAR => {
CUresourcetype::CU_RESOURCE_TYPE_LINEAR => {
writer.write_all(b", res: ")?;
CudaDisplay::write(unsafe { &self.res.linear }, "", 0, writer)?;
writer.write_all(b", flags: ")?;
CudaDisplay::write(&self.flags, "", 0, writer)?;
writer.write_all(b" }")
}
cuda_types::CUresourcetype::CU_RESOURCE_TYPE_PITCH2D => {
CUresourcetype::CU_RESOURCE_TYPE_PITCH2D => {
writer.write_all(b", res: ")?;
CudaDisplay::write(unsafe { &self.res.pitch2D }, "", 0, writer)?;
writer.write_all(b", flags: ")?;
@ -335,7 +335,7 @@ impl CudaDisplay for cuda_types::CUDA_RESOURCE_DESC_st {
}
}
impl CudaDisplay for cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
impl CudaDisplay for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
fn write(
&self,
_fn_name: &'static str,
@ -345,22 +345,22 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
writer.write_all(b"{ type: ")?;
CudaDisplay::write(&self.type_, "", 0, writer)?;
match self.type_ {
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD => {
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD => {
writer.write_all(b", handle: ")?;
CudaDisplay::write(unsafe { &self.handle.fd }, "", 0,writer)?;
}
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
| cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
| cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
|cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE => {
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
| CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
| CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
|CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE => {
write_win32_handle(unsafe { self.handle.win32 }, writer)?;
}
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
| cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT => {
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
| CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT => {
writer.write_all(b", handle: ")?;
CudaDisplay::write(unsafe { &self.handle.win32.handle }, "", 0,writer)?;
}
cuda_types::CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF => {
CUexternalMemoryHandleType::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF => {
writer.write_all(b", handle: ")?;
CudaDisplay::write(unsafe { &self.handle.nvSciBufObject }, "", 0,writer)?;
}
@ -381,7 +381,7 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
}
pub fn write_win32_handle(
win32: cuda_types::CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
win32: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
if win32.handle != ptr::null_mut() {
@ -400,7 +400,7 @@ pub fn write_win32_handle(
Ok(())
}
impl CudaDisplay for cuda_types::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
impl CudaDisplay for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
fn write(
&self,
_fn_name: &'static str,
@ -410,22 +410,22 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
writer.write_all(b"{ type: ")?;
CudaDisplay::write(&self.type_, "", 0, writer)?;
match self.type_ {
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD => {
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD => {
writer.write_all(b", handle: ")?;
CudaDisplay::write(unsafe { &self.handle.fd }, "", 0,writer)?;
}
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
| cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT => {
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
| CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT => {
write_win32_handle(unsafe { mem::transmute(self.handle.win32) }, writer)?;
}
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT => {
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT => {
writer.write_all(b", handle: ")?;
CudaDisplay::write(unsafe { &self.handle.win32.handle }, "", 0,writer)?;
}
cuda_types::CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC => {
CUexternalSemaphoreHandleType::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC => {
writer.write_all(b", handle: ")?;
CudaDisplay::write(unsafe { &self.handle.nvSciSyncObj }, "", 0,writer)?;
}
@ -442,7 +442,7 @@ impl CudaDisplay for cuda_types::CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
}
impl CudaDisplay
for cuda_types::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2
for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2
{
fn write(
&self,
@ -457,7 +457,7 @@ impl CudaDisplay
}
impl CudaDisplay
for cuda_types::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2
for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2
{
fn write(
&self,
@ -471,7 +471,7 @@ impl CudaDisplay
}
}
impl CudaDisplay for cuda_types::CUgraphNodeParams_st {
impl CudaDisplay for CUgraphNodeParams_st {
fn write(
&self,
_fn_name: &'static str,
@ -482,7 +482,7 @@ impl CudaDisplay for cuda_types::CUgraphNodeParams_st {
}
}
impl CudaDisplay for cuda_types::CUlaunchConfig_st {
impl CudaDisplay for CUlaunchConfig_st {
fn write(
&self,
_fn_name: &'static str,
@ -493,7 +493,7 @@ impl CudaDisplay for cuda_types::CUlaunchConfig_st {
}
}
impl CudaDisplay for cuda_types::CUeglFrame_st {
impl CudaDisplay for CUeglFrame_st {
fn write(
&self,
_fn_name: &'static str,
@ -504,7 +504,7 @@ impl CudaDisplay for cuda_types::CUeglFrame_st {
}
}
impl CudaDisplay for cuda_types::CUdevResource_st {
impl CudaDisplay for CUdevResource_st {
fn write(
&self,
_fn_name: &'static str,
@ -514,7 +514,7 @@ impl CudaDisplay for cuda_types::CUdevResource_st {
todo!()
}
}
impl CudaDisplay for cuda_types::CUlaunchAttribute_st {
impl CudaDisplay for CUlaunchAttribute_st {
fn write(
&self,
_fn_name: &'static str,
@ -574,7 +574,7 @@ impl<T: CudaDisplay, const N: usize> CudaDisplay for [T; N] {
}
}
impl CudaDisplay for cuda_types::CUarrayMapInfo_st {
impl CudaDisplay for CUarrayMapInfo_st {
fn write(
&self,
_fn_name: &'static str,
@ -585,7 +585,7 @@ impl CudaDisplay for cuda_types::CUarrayMapInfo_st {
}
}
impl CudaDisplay for cuda_types::CUexecAffinityParam_st {
impl CudaDisplay for CUexecAffinityParam_st {
fn write(
&self,
_fn_name: &'static str,
@ -599,9 +599,9 @@ impl CudaDisplay for cuda_types::CUexecAffinityParam_st {
#[allow(non_snake_case)]
pub fn write_cuGraphKernelNodeGetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
hNode: cuda_types::CUgraphNode,
attr: cuda_types::CUkernelNodeAttrID,
value_out: *mut cuda_types::CUkernelNodeAttrValue,
hNode: CUgraphNode,
attr: CUkernelNodeAttrID,
value_out: *mut CUkernelNodeAttrValue,
) -> std::io::Result<()> {
writer.write_all(b"(hNode: ")?;
CudaDisplay::write(&hNode, "cuGraphKernelNodeGetAttribute", 0, writer)?;
@ -614,9 +614,9 @@ pub fn write_cuGraphKernelNodeGetAttribute(
#[allow(non_snake_case)]
pub fn write_cuGraphKernelNodeSetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
hNode: cuda_types::CUgraphNode,
attr: cuda_types::CUkernelNodeAttrID,
value_out: *const cuda_types::CUkernelNodeAttrValue,
hNode: CUgraphNode,
attr: CUkernelNodeAttrID,
value_out: *const CUkernelNodeAttrValue,
) -> std::io::Result<()> {
write_cuGraphKernelNodeGetAttribute(writer, hNode, attr, value_out as *mut _)
}
@ -624,9 +624,9 @@ pub fn write_cuGraphKernelNodeSetAttribute(
#[allow(non_snake_case)]
pub fn write_cuStreamGetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
hStream: cuda_types::CUstream,
attr: cuda_types::CUstreamAttrID,
value_out: *mut cuda_types::CUstreamAttrValue,
hStream: CUstream,
attr: CUstreamAttrID,
value_out: *mut CUstreamAttrValue,
) -> std::io::Result<()> {
writer.write_all(b"(hStream: ")?;
CudaDisplay::write(&hStream, "cuStreamGetAttribute", 0, writer)?;
@ -640,11 +640,11 @@ fn write_launch_attribute(
writer: &mut (impl std::io::Write + ?Sized),
fn_name: &'static str,
index: usize,
attribute: cuda_types::CUlaunchAttributeID,
value_out: *mut cuda_types::CUstreamAttrValue,
attribute: CUlaunchAttributeID,
value_out: *mut CUstreamAttrValue,
) -> std::io::Result<()> {
match attribute {
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).accessPolicyWindow },
@ -653,47 +653,47 @@ fn write_launch_attribute(
writer,
)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).cooperative }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).syncPolicy }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).clusterDim }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).clusterSchedulingPolicyPreference }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).programmaticStreamSerializationAllowed }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).programmaticEvent }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).priority }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).memSyncDomainMap }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).memSyncDomain }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).launchCompletionEvent }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).deviceUpdatableKernelNode }, fn_name, index, writer)
}
@ -704,9 +704,9 @@ fn write_launch_attribute(
#[allow(non_snake_case)]
pub fn write_cuStreamGetAttribute_ptsz(
writer: &mut (impl std::io::Write + ?Sized),
hStream: cuda_types::CUstream,
attr: cuda_types::CUstreamAttrID,
value_out: *mut cuda_types::CUstreamAttrValue,
hStream: CUstream,
attr: CUstreamAttrID,
value_out: *mut CUstreamAttrValue,
) -> std::io::Result<()> {
write_cuStreamGetAttribute(writer, hStream, attr, value_out)
}
@ -714,9 +714,9 @@ pub fn write_cuStreamGetAttribute_ptsz(
#[allow(non_snake_case)]
pub fn write_cuStreamSetAttribute(
writer: &mut (impl std::io::Write + ?Sized),
hStream: cuda_types::CUstream,
attr: cuda_types::CUstreamAttrID,
value_out: *const cuda_types::CUstreamAttrValue,
hStream: CUstream,
attr: CUstreamAttrID,
value_out: *const CUstreamAttrValue,
) -> std::io::Result<()> {
write_cuStreamGetAttribute(writer, hStream, attr, value_out as *mut _)
}
@ -724,9 +724,9 @@ pub fn write_cuStreamSetAttribute(
#[allow(non_snake_case)]
pub fn write_cuStreamSetAttribute_ptsz(
writer: &mut (impl std::io::Write + ?Sized),
hStream: cuda_types::CUstream,
attr: cuda_types::CUstreamAttrID,
value_out: *const cuda_types::CUstreamAttrValue,
hStream: CUstream,
attr: CUstreamAttrID,
value_out: *const CUstreamAttrValue,
) -> std::io::Result<()> {
write_cuStreamSetAttribute(writer, hStream, attr, value_out)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
use cuda_types::*;
use cuda_types::cuda::*;
use paste::paste;
use side_by_side::CudaDynamicFns;
use std::io;
@ -9,7 +9,7 @@ extern crate lazy_static;
extern crate cuda_types;
macro_rules! extern_redirect {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[no_mangle]
#[allow(improper_ctypes_definitions)]
@ -30,7 +30,7 @@ macro_rules! extern_redirect {
}
macro_rules! extern_redirect_with_post {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[no_mangle]
#[allow(improper_ctypes_definitions)]

View File

@ -1,8 +1,5 @@
use crate::format;
use cuda_types::CUmodule;
use cuda_types::CUuuid;
use super::CUresult;
use cuda_types::cuda::*;
use super::Settings;
use std::error::Error;
use std::ffi::c_void;

View File

@ -1,4 +1,4 @@
use cuda_types::CUuuid;
use cuda_types::cuda::CUuuid;
use std::ffi::{c_void, CStr, CString};
use std::mem;

View File

@ -56,7 +56,7 @@ impl CudaDynamicFns {
}
macro_rules! emit_cuda_fn_table {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
#[derive(Default)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]

View File

@ -1,5 +1,5 @@
use crate::{dark_api, log, Settings};
use cuda_types::CUmodule;
use cuda_types::cuda::*;
use std::{
collections::HashMap,
ffi::{c_void, CStr, CString},

View File

@ -9,7 +9,7 @@ name = "zluda_with"
path = "src/main.rs"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["jobapi2", "processthreadsapi", "synchapi", "winbase", "std"] }
winapi = { version = "0.3", features = ["jobapi2", "processthreadsapi", "synchapi", "winbase", "std", "processenv"] }
tempfile = "3"
argh = "0.1"
detours-sys = { path = "../detours-sys" }

View File

@ -7,6 +7,9 @@ use std::{
};
fn main() -> Result<(), VarError> {
if std::env::var_os("CARGO_CFG_WINDOWS").is_none() {
return Ok(());
}
println!("cargo:rerun-if-changed=build.rs");
if env::var("PROFILE")? != "debug" {
return Ok(());

View File

@ -1,6 +1,6 @@
#![crate_type = "bin"]
#[link(name = "do_cuinit")]
#[link(name = "do_cuinit", kind = "raw-dylib")]
extern "system" {
fn do_cuinit(flags: u32) -> u32;
}

View File

@ -7,3 +7,7 @@ edition = "2021"
[lib]
name = "nvml"
crate-type = ["cdylib"]
[dependencies]
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }

View File

@ -1,3 +0,0 @@
bindgen "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include\nvml.h" --whitelist-function="^nvml.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug -o src/nvml.rs
sed -i -e 's/extern "C" {//g' -e 's/-> nvmlReturn_t;/-> nvmlReturn_t { crate::r#impl::unimplemented()/g' -e 's/pub fn /#[no_mangle] pub extern "C" fn /g' src/nvml.rs
rustfmt src/nvml.rs

View File

@ -1,4 +1,5 @@
use crate::nvml::nvmlReturn_t;
use cuda_types::nvml::*;
use std::{ffi::CStr, ptr};
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> nvmlReturn_t {
@ -9,3 +10,35 @@ pub(crate) fn unimplemented() -> nvmlReturn_t {
pub(crate) fn unimplemented() -> nvmlReturn_t {
nvmlReturn_t::NVML_ERROR_NOT_SUPPORTED
}
#[allow(non_snake_case)]
pub(crate) fn nvmlErrorString(
_result: cuda_types::nvml::nvmlReturn_t,
) -> *const ::core::ffi::c_char {
c"".as_ptr()
}
#[allow(non_snake_case)]
pub(crate) fn nvmlInit_v2() -> cuda_types::nvml::nvmlReturn_t {
nvmlReturn_t::SUCCESS
}
const VERSION: &'static CStr = c"550.77";
#[allow(non_snake_case)]
pub(crate) fn nvmlSystemGetDriverVersion(
result: *mut ::core::ffi::c_char,
length: ::core::ffi::c_uint,
) -> cuda_types::nvml::nvmlReturn_t {
if result == ptr::null_mut() {
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
}
let version = VERSION.to_bytes_with_nul();
let copy_length = usize::min(length as usize, version.len());
let slice = unsafe { std::slice::from_raw_parts_mut(result.cast(), copy_length) };
slice.copy_from_slice(&version[..copy_length]);
if let Some(null) = slice.last_mut() {
*null = 0;
}
nvmlReturn_t::SUCCESS
}

View File

@ -1,3 +1,34 @@
pub mod r#impl;
#[allow(warnings)]
mod nvml;
mod r#impl;
macro_rules! unimplemented_fn {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[no_mangle]
#[allow(improper_ctypes_definitions)]
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
r#impl::unimplemented()
}
)*
};
}
macro_rules! implemented_fn {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[no_mangle]
#[allow(improper_ctypes_definitions)]
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
r#impl::$fn_name($($arg_id),*)
}
)*
};
}
cuda_base::nvml_function_declarations!(
unimplemented_fn,
implemented_fn <= [
nvmlErrorString,
nvmlInit_v2,
nvmlSystemGetDriverVersion
]
);

File diff suppressed because it is too large Load Diff