cargo fmt non-generated files

This commit is contained in:
Violet
2025-07-30 18:49:32 +00:00
parent 427d583007
commit bb6bd7a50b
32 changed files with 11413 additions and 11352 deletions

View File

@ -1,407 +1,408 @@
use amd_comgr_sys::*;
use std::{ffi::CStr, mem, ptr};
macro_rules! call_dispatch_arg {
(2, $arg:ident) => {
$arg.comgr2()
};
(2, $arg:tt) => {
#[allow(unused_braces)]
$arg
};
(3, $arg:ident) => {
$arg.comgr3()
};
(3, $arg:tt) => {
#[allow(unused_braces)]
$arg
};
}
macro_rules! call_dispatch {
($src:expr => $fn_:ident( $($arg:tt),+ )) => {
match $src {
Comgr::V2(this) => unsafe { this. $fn_(
$(
call_dispatch_arg!(2, $arg),
)+
) }?,
Comgr::V3(this) => unsafe { this. $fn_(
$(
call_dispatch_arg!(3, $arg),
)+
) }?,
}
};
}
macro_rules! comgr_owned {
($name:ident, $comgr_type:ident, $ctor:ident, $dtor:ident) => {
struct $name<'a> {
handle: u64,
comgr: &'a Comgr,
}
impl<'a> $name<'a> {
fn new(comgr: &'a Comgr) -> Result<Self, Error> {
let handle = match comgr {
Comgr::V2(comgr) => {
let mut result = unsafe { mem::zeroed() };
unsafe { comgr.$ctor(&mut result)? };
result.handle
}
Comgr::V3(comgr) => {
let mut result = unsafe { mem::zeroed() };
unsafe { comgr.$ctor(&mut result)? };
result.handle
}
};
Ok(Self { handle, comgr })
}
fn comgr2(&self) -> amd_comgr_sys::comgr2::$comgr_type {
amd_comgr_sys::comgr2::$comgr_type {
handle: self.handle,
}
}
fn comgr3(&self) -> amd_comgr_sys::comgr3::$comgr_type {
amd_comgr_sys::comgr3::$comgr_type {
handle: self.handle,
}
}
}
impl<'a> Drop for $name<'a> {
fn drop(&mut self) {
match self.comgr {
Comgr::V2(comgr) => {
unsafe {
comgr.$dtor(amd_comgr_sys::comgr2::$comgr_type {
handle: self.handle,
})
}
.ok();
}
Comgr::V3(comgr) => {
unsafe {
comgr.$dtor(amd_comgr_sys::comgr3::$comgr_type {
handle: self.handle,
})
}
.ok();
}
}
}
}
};
}
comgr_owned!(
ActionInfo,
amd_comgr_action_info_t,
amd_comgr_create_action_info,
amd_comgr_destroy_action_info
);
impl<'a> ActionInfo<'a> {
fn set_isa_name(&self, isa: &CStr) -> Result<(), Error> {
let mut full_isa = "amdgcn-amd-amdhsa--".to_string().into_bytes();
full_isa.extend(isa.to_bytes_with_nul());
call_dispatch!(self.comgr => amd_comgr_action_info_set_isa_name(self, { full_isa.as_ptr().cast() }));
Ok(())
}
fn set_language(&self, language: Language) -> Result<(), Error> {
call_dispatch!(self.comgr => amd_comgr_action_info_set_language(self, language));
Ok(())
}
fn set_options<'b>(&self, options: impl Iterator<Item = &'b CStr>) -> Result<(), Error> {
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
call_dispatch!(self.comgr => amd_comgr_action_info_set_option_list(self, { options.as_ptr().cast_mut() }, { options.len() }));
Ok(())
}
}
comgr_owned!(
DataSet,
amd_comgr_data_set_t,
amd_comgr_create_data_set,
amd_comgr_destroy_data_set
);
impl<'a> DataSet<'a> {
fn add(&self, data: &Data) -> Result<(), Error> {
call_dispatch!(self.comgr => amd_comgr_data_set_add(self, data));
Ok(())
}
fn get_data(&self, kind: DataKind, index: usize) -> Result<Data, Error> {
let mut handle = 0u64;
call_dispatch!(self.comgr => amd_comgr_action_data_get_data(self, kind, { index }, { std::ptr::from_mut(&mut handle).cast() }));
Ok(Data(handle))
}
}
struct Data(u64);
impl Data {
fn new(comgr: &Comgr, kind: DataKind, name: &CStr, content: &[u8]) -> Result<Self, Error> {
let mut handle = 0u64;
call_dispatch!(comgr => amd_comgr_create_data(kind, { std::ptr::from_mut(&mut handle).cast() }));
let data = Data(handle);
call_dispatch!(comgr => amd_comgr_set_data_name(data, { name.as_ptr() }));
call_dispatch!(comgr => amd_comgr_set_data(data, { content.len() }, { content.as_ptr().cast() }));
Ok(data)
}
fn comgr2(&self) -> comgr2::amd_comgr_data_t {
comgr2::amd_comgr_data_s { handle: self.0 }
}
fn comgr3(&self) -> comgr3::amd_comgr_data_t {
comgr3::amd_comgr_data_s { handle: self.0 }
}
fn copy_content(&self, comgr: &Comgr) -> Result<Vec<u8>, Error> {
let mut size = unsafe { mem::zeroed() };
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { ptr::null_mut() }));
let mut result: Vec<u8> = Vec::with_capacity(size);
unsafe { result.set_len(size) };
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { result.as_mut_ptr().cast() }));
Ok(result)
}
}
pub fn compile_bitcode(
comgr: &Comgr,
gcn_arch: &CStr,
main_buffer: &[u8],
attributes_buffer: &[u8],
ptx_impl: &[u8],
) -> Result<Vec<u8>, Error> {
let bitcode_data_set = DataSet::new(comgr)?;
let main_bitcode_data = Data::new(comgr, DataKind::Bc, c"zluda.bc", main_buffer)?;
bitcode_data_set.add(&main_bitcode_data)?;
let attributes_bitcode_data = Data::new(comgr, DataKind::Bc, c"attributes.bc", attributes_buffer)?;
bitcode_data_set.add(&attributes_bitcode_data)?;
let stdlib_bitcode_data = Data::new(comgr, DataKind::Bc, c"ptx_impl.bc", ptx_impl)?;
bitcode_data_set.add(&stdlib_bitcode_data)?;
let linking_info = ActionInfo::new(comgr)?;
let linked_data_set =
comgr.do_action(ActionKind::LinkBcToBc, &linking_info, &bitcode_data_set)?;
let compile_to_exec = ActionInfo::new(comgr)?;
compile_to_exec.set_isa_name(gcn_arch)?;
compile_to_exec.set_language(Language::LlvmIr)?;
let common_options = [
// This makes no sense, but it makes ockl linking work
c"-Xclang",
c"-mno-link-builtin-bitcode-postopt",
// Otherwise LLVM omits dynamic fp mode for ockl functions during linking
// and then fails to inline them
c"-Xclang",
c"-fdenormal-fp-math=dynamic",
c"-O3",
c"-mno-wavefrontsize64",
c"-mcumode",
// Useful for inlining reports, combined with AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stderr
// c"-fsave-optimization-record=yaml",
]
.into_iter();
let opt_options = if cfg!(debug_assertions) {
//[c"-g", c"-mllvm", c"-print-before-all", c"", c""]
[c"-g", c"", c"", c"", c""]
} else {
[
c"-g0",
// default inlining threshold times 10
c"-mllvm",
c"-inline-threshold=2250",
c"-mllvm",
c"-inlinehint-threshold=3250",
]
};
compile_to_exec.set_options(common_options.chain(opt_options))?;
let exec_data_set = comgr.do_action(
ActionKind::CompileSourceToExecutable,
&compile_to_exec,
&linked_data_set,
)?;
let executable = exec_data_set.get_data(DataKind::Executable, 0)?;
executable.copy_content(comgr)
}
pub enum Comgr {
V2(amd_comgr_sys::comgr2::Comgr2),
V3(amd_comgr_sys::comgr3::Comgr3),
}
impl Comgr {
pub fn new() -> Result<Self, Error> {
unsafe { libloading::Library::new(os::COMGR3) }
.and_then(|lib| {
Ok(Comgr::V3(unsafe {
amd_comgr_sys::comgr3::Comgr3::from_library(lib)?
}))
})
.or_else(|_| {
unsafe { libloading::Library::new(os::COMGR2) }.and_then(|lib| {
Ok(if Self::is_broken_v2(&lib) {
Comgr::V3(unsafe { amd_comgr_sys::comgr3::Comgr3::from_library(lib)? })
} else {
Comgr::V2(unsafe { amd_comgr_sys::comgr2::Comgr2::from_library(lib)? })
})
})
})
.map_err(Into::into)
}
// For reasons unknown, on AMD Adrenalin 25.5.1, AMD ships amd_comgr_2.dll that shows up as
// version 2.9.0, but actually uses the 3.X ABI. This is our best effort to detect it.
// Version 25.3.1 returns 2.8.0, which seem to be the last version that actually uses the 2 ABI
fn is_broken_v2(lib: &libloading::Library) -> bool {
if cfg!(not(windows)) {
return false;
}
let amd_comgr_get_version = match unsafe {
lib.get::<unsafe extern "C" fn(major: *mut usize, minor: *mut usize)>(
b"amd_comgr_get_version\0",
)
} {
Ok(symbol) => symbol,
Err(_) => return false,
};
let mut major = 0;
let mut minor = 0;
unsafe { (amd_comgr_get_version)(&mut major, &mut minor) };
(major, minor) >= (2, 9)
}
fn do_action(
&self,
kind: ActionKind,
action: &ActionInfo,
data_set: &DataSet,
) -> Result<DataSet, Error> {
let result = DataSet::new(self)?;
call_dispatch!(self => amd_comgr_do_action(kind, action, data_set, result));
Ok(result)
}
}
#[derive(Debug)]
pub struct Error(pub ::std::num::NonZeroU32);
impl Error {
#[doc = " A generic error has occurred."]
pub const UNKNOWN: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(1) });
#[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
pub const INVALID_ARGUMENT: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(2) });
#[doc = " Failed to allocate the necessary resources."]
pub const OUT_OF_RESOURCES: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(3) });
}
impl From<libloading::Error> for Error {
fn from(_: libloading::Error) -> Self {
Self::UNKNOWN
}
}
impl From<comgr2::amd_comgr_status_s> for Error {
fn from(status: comgr2::amd_comgr_status_s) -> Self {
Error(status.0)
}
}
impl From<comgr3::amd_comgr_status_s> for Error {
fn from(status: comgr3::amd_comgr_status_s) -> Self {
Error(status.0)
}
}
macro_rules! impl_into {
($self_type:ident, $to_type:ident, [$($from:ident => $to:ident),+]) => {
#[derive(Copy, Clone)]
#[allow(unused)]
enum $self_type {
$(
$from,
)+
}
impl $self_type {
fn comgr2(self) -> comgr2::$to_type {
match self {
$(
Self:: $from => comgr2 :: $to_type :: $to,
)+
}
}
fn comgr3(self) -> comgr3::$to_type {
match self {
$(
Self:: $from => comgr3 :: $to_type :: $to,
)+
}
}
}
};
}
impl_into!(
ActionKind,
amd_comgr_action_kind_t,
[
LinkBcToBc => AMD_COMGR_ACTION_LINK_BC_TO_BC,
CompileSourceToExecutable => AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE
]
);
impl_into!(
DataKind,
amd_comgr_data_kind_t,
[
Undef => AMD_COMGR_DATA_KIND_UNDEF,
Source => AMD_COMGR_DATA_KIND_SOURCE,
Include => AMD_COMGR_DATA_KIND_INCLUDE,
PrecompiledHeader => AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER,
Diagnostic => AMD_COMGR_DATA_KIND_DIAGNOSTIC,
Log => AMD_COMGR_DATA_KIND_LOG,
Bc => AMD_COMGR_DATA_KIND_BC,
Relocatable => AMD_COMGR_DATA_KIND_RELOCATABLE,
Executable => AMD_COMGR_DATA_KIND_EXECUTABLE,
Bytes => AMD_COMGR_DATA_KIND_BYTES,
Fatbin => AMD_COMGR_DATA_KIND_FATBIN,
Ar => AMD_COMGR_DATA_KIND_AR,
BcBundle => AMD_COMGR_DATA_KIND_BC_BUNDLE,
ArBundle => AMD_COMGR_DATA_KIND_AR_BUNDLE,
ObjBundle => AMD_COMGR_DATA_KIND_OBJ_BUNDLE
]
);
impl_into!(
Language,
amd_comgr_language_t,
[
None => AMD_COMGR_LANGUAGE_NONE,
OpenCl12 => AMD_COMGR_LANGUAGE_OPENCL_1_2,
OpenCl20 => AMD_COMGR_LANGUAGE_OPENCL_2_0,
Hip => AMD_COMGR_LANGUAGE_HIP,
LlvmIr => AMD_COMGR_LANGUAGE_LLVM_IR
]
);
#[cfg(unix)]
mod os {
pub static COMGR3: &'static str = "libamd_comgr.so.3";
pub static COMGR2: &'static str = "libamd_comgr.so.2";
}
#[cfg(windows)]
mod os {
pub static COMGR3: &'static str = "amd_comgr_3.dll";
pub static COMGR2: &'static str = "amd_comgr_2.dll";
}
use amd_comgr_sys::*;
use std::{ffi::CStr, mem, ptr};
macro_rules! call_dispatch_arg {
(2, $arg:ident) => {
$arg.comgr2()
};
(2, $arg:tt) => {
#[allow(unused_braces)]
$arg
};
(3, $arg:ident) => {
$arg.comgr3()
};
(3, $arg:tt) => {
#[allow(unused_braces)]
$arg
};
}
macro_rules! call_dispatch {
($src:expr => $fn_:ident( $($arg:tt),+ )) => {
match $src {
Comgr::V2(this) => unsafe { this. $fn_(
$(
call_dispatch_arg!(2, $arg),
)+
) }?,
Comgr::V3(this) => unsafe { this. $fn_(
$(
call_dispatch_arg!(3, $arg),
)+
) }?,
}
};
}
macro_rules! comgr_owned {
($name:ident, $comgr_type:ident, $ctor:ident, $dtor:ident) => {
struct $name<'a> {
handle: u64,
comgr: &'a Comgr,
}
impl<'a> $name<'a> {
fn new(comgr: &'a Comgr) -> Result<Self, Error> {
let handle = match comgr {
Comgr::V2(comgr) => {
let mut result = unsafe { mem::zeroed() };
unsafe { comgr.$ctor(&mut result)? };
result.handle
}
Comgr::V3(comgr) => {
let mut result = unsafe { mem::zeroed() };
unsafe { comgr.$ctor(&mut result)? };
result.handle
}
};
Ok(Self { handle, comgr })
}
fn comgr2(&self) -> amd_comgr_sys::comgr2::$comgr_type {
amd_comgr_sys::comgr2::$comgr_type {
handle: self.handle,
}
}
fn comgr3(&self) -> amd_comgr_sys::comgr3::$comgr_type {
amd_comgr_sys::comgr3::$comgr_type {
handle: self.handle,
}
}
}
impl<'a> Drop for $name<'a> {
fn drop(&mut self) {
match self.comgr {
Comgr::V2(comgr) => {
unsafe {
comgr.$dtor(amd_comgr_sys::comgr2::$comgr_type {
handle: self.handle,
})
}
.ok();
}
Comgr::V3(comgr) => {
unsafe {
comgr.$dtor(amd_comgr_sys::comgr3::$comgr_type {
handle: self.handle,
})
}
.ok();
}
}
}
}
};
}
comgr_owned!(
ActionInfo,
amd_comgr_action_info_t,
amd_comgr_create_action_info,
amd_comgr_destroy_action_info
);
impl<'a> ActionInfo<'a> {
fn set_isa_name(&self, isa: &CStr) -> Result<(), Error> {
let mut full_isa = "amdgcn-amd-amdhsa--".to_string().into_bytes();
full_isa.extend(isa.to_bytes_with_nul());
call_dispatch!(self.comgr => amd_comgr_action_info_set_isa_name(self, { full_isa.as_ptr().cast() }));
Ok(())
}
fn set_language(&self, language: Language) -> Result<(), Error> {
call_dispatch!(self.comgr => amd_comgr_action_info_set_language(self, language));
Ok(())
}
fn set_options<'b>(&self, options: impl Iterator<Item = &'b CStr>) -> Result<(), Error> {
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
call_dispatch!(self.comgr => amd_comgr_action_info_set_option_list(self, { options.as_ptr().cast_mut() }, { options.len() }));
Ok(())
}
}
comgr_owned!(
DataSet,
amd_comgr_data_set_t,
amd_comgr_create_data_set,
amd_comgr_destroy_data_set
);
impl<'a> DataSet<'a> {
fn add(&self, data: &Data) -> Result<(), Error> {
call_dispatch!(self.comgr => amd_comgr_data_set_add(self, data));
Ok(())
}
fn get_data(&self, kind: DataKind, index: usize) -> Result<Data, Error> {
let mut handle = 0u64;
call_dispatch!(self.comgr => amd_comgr_action_data_get_data(self, kind, { index }, { std::ptr::from_mut(&mut handle).cast() }));
Ok(Data(handle))
}
}
struct Data(u64);
impl Data {
fn new(comgr: &Comgr, kind: DataKind, name: &CStr, content: &[u8]) -> Result<Self, Error> {
let mut handle = 0u64;
call_dispatch!(comgr => amd_comgr_create_data(kind, { std::ptr::from_mut(&mut handle).cast() }));
let data = Data(handle);
call_dispatch!(comgr => amd_comgr_set_data_name(data, { name.as_ptr() }));
call_dispatch!(comgr => amd_comgr_set_data(data, { content.len() }, { content.as_ptr().cast() }));
Ok(data)
}
fn comgr2(&self) -> comgr2::amd_comgr_data_t {
comgr2::amd_comgr_data_s { handle: self.0 }
}
fn comgr3(&self) -> comgr3::amd_comgr_data_t {
comgr3::amd_comgr_data_s { handle: self.0 }
}
fn copy_content(&self, comgr: &Comgr) -> Result<Vec<u8>, Error> {
let mut size = unsafe { mem::zeroed() };
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { ptr::null_mut() }));
let mut result: Vec<u8> = Vec::with_capacity(size);
unsafe { result.set_len(size) };
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { result.as_mut_ptr().cast() }));
Ok(result)
}
}
pub fn compile_bitcode(
comgr: &Comgr,
gcn_arch: &CStr,
main_buffer: &[u8],
attributes_buffer: &[u8],
ptx_impl: &[u8],
) -> Result<Vec<u8>, Error> {
let bitcode_data_set = DataSet::new(comgr)?;
let main_bitcode_data = Data::new(comgr, DataKind::Bc, c"zluda.bc", main_buffer)?;
bitcode_data_set.add(&main_bitcode_data)?;
let attributes_bitcode_data =
Data::new(comgr, DataKind::Bc, c"attributes.bc", attributes_buffer)?;
bitcode_data_set.add(&attributes_bitcode_data)?;
let stdlib_bitcode_data = Data::new(comgr, DataKind::Bc, c"ptx_impl.bc", ptx_impl)?;
bitcode_data_set.add(&stdlib_bitcode_data)?;
let linking_info = ActionInfo::new(comgr)?;
let linked_data_set =
comgr.do_action(ActionKind::LinkBcToBc, &linking_info, &bitcode_data_set)?;
let compile_to_exec = ActionInfo::new(comgr)?;
compile_to_exec.set_isa_name(gcn_arch)?;
compile_to_exec.set_language(Language::LlvmIr)?;
let common_options = [
// This makes no sense, but it makes ockl linking work
c"-Xclang",
c"-mno-link-builtin-bitcode-postopt",
// Otherwise LLVM omits dynamic fp mode for ockl functions during linking
// and then fails to inline them
c"-Xclang",
c"-fdenormal-fp-math=dynamic",
c"-O3",
c"-mno-wavefrontsize64",
c"-mcumode",
// Useful for inlining reports, combined with AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stderr
// c"-fsave-optimization-record=yaml",
]
.into_iter();
let opt_options = if cfg!(debug_assertions) {
//[c"-g", c"-mllvm", c"-print-before-all", c"", c""]
[c"-g", c"", c"", c"", c""]
} else {
[
c"-g0",
// default inlining threshold times 10
c"-mllvm",
c"-inline-threshold=2250",
c"-mllvm",
c"-inlinehint-threshold=3250",
]
};
compile_to_exec.set_options(common_options.chain(opt_options))?;
let exec_data_set = comgr.do_action(
ActionKind::CompileSourceToExecutable,
&compile_to_exec,
&linked_data_set,
)?;
let executable = exec_data_set.get_data(DataKind::Executable, 0)?;
executable.copy_content(comgr)
}
pub enum Comgr {
V2(amd_comgr_sys::comgr2::Comgr2),
V3(amd_comgr_sys::comgr3::Comgr3),
}
impl Comgr {
pub fn new() -> Result<Self, Error> {
unsafe { libloading::Library::new(os::COMGR3) }
.and_then(|lib| {
Ok(Comgr::V3(unsafe {
amd_comgr_sys::comgr3::Comgr3::from_library(lib)?
}))
})
.or_else(|_| {
unsafe { libloading::Library::new(os::COMGR2) }.and_then(|lib| {
Ok(if Self::is_broken_v2(&lib) {
Comgr::V3(unsafe { amd_comgr_sys::comgr3::Comgr3::from_library(lib)? })
} else {
Comgr::V2(unsafe { amd_comgr_sys::comgr2::Comgr2::from_library(lib)? })
})
})
})
.map_err(Into::into)
}
// For reasons unknown, on AMD Adrenalin 25.5.1, AMD ships amd_comgr_2.dll that shows up as
// version 2.9.0, but actually uses the 3.X ABI. This is our best effort to detect it.
// Version 25.3.1 returns 2.8.0, which seem to be the last version that actually uses the 2 ABI
fn is_broken_v2(lib: &libloading::Library) -> bool {
if cfg!(not(windows)) {
return false;
}
let amd_comgr_get_version = match unsafe {
lib.get::<unsafe extern "C" fn(major: *mut usize, minor: *mut usize)>(
b"amd_comgr_get_version\0",
)
} {
Ok(symbol) => symbol,
Err(_) => return false,
};
let mut major = 0;
let mut minor = 0;
unsafe { (amd_comgr_get_version)(&mut major, &mut minor) };
(major, minor) >= (2, 9)
}
fn do_action(
&self,
kind: ActionKind,
action: &ActionInfo,
data_set: &DataSet,
) -> Result<DataSet, Error> {
let result = DataSet::new(self)?;
call_dispatch!(self => amd_comgr_do_action(kind, action, data_set, result));
Ok(result)
}
}
#[derive(Debug)]
pub struct Error(pub ::std::num::NonZeroU32);
impl Error {
#[doc = " A generic error has occurred."]
pub const UNKNOWN: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(1) });
#[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
pub const INVALID_ARGUMENT: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(2) });
#[doc = " Failed to allocate the necessary resources."]
pub const OUT_OF_RESOURCES: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(3) });
}
impl From<libloading::Error> for Error {
fn from(_: libloading::Error) -> Self {
Self::UNKNOWN
}
}
impl From<comgr2::amd_comgr_status_s> for Error {
fn from(status: comgr2::amd_comgr_status_s) -> Self {
Error(status.0)
}
}
impl From<comgr3::amd_comgr_status_s> for Error {
fn from(status: comgr3::amd_comgr_status_s) -> Self {
Error(status.0)
}
}
macro_rules! impl_into {
($self_type:ident, $to_type:ident, [$($from:ident => $to:ident),+]) => {
#[derive(Copy, Clone)]
#[allow(unused)]
enum $self_type {
$(
$from,
)+
}
impl $self_type {
fn comgr2(self) -> comgr2::$to_type {
match self {
$(
Self:: $from => comgr2 :: $to_type :: $to,
)+
}
}
fn comgr3(self) -> comgr3::$to_type {
match self {
$(
Self:: $from => comgr3 :: $to_type :: $to,
)+
}
}
}
};
}
impl_into!(
ActionKind,
amd_comgr_action_kind_t,
[
LinkBcToBc => AMD_COMGR_ACTION_LINK_BC_TO_BC,
CompileSourceToExecutable => AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE
]
);
impl_into!(
DataKind,
amd_comgr_data_kind_t,
[
Undef => AMD_COMGR_DATA_KIND_UNDEF,
Source => AMD_COMGR_DATA_KIND_SOURCE,
Include => AMD_COMGR_DATA_KIND_INCLUDE,
PrecompiledHeader => AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER,
Diagnostic => AMD_COMGR_DATA_KIND_DIAGNOSTIC,
Log => AMD_COMGR_DATA_KIND_LOG,
Bc => AMD_COMGR_DATA_KIND_BC,
Relocatable => AMD_COMGR_DATA_KIND_RELOCATABLE,
Executable => AMD_COMGR_DATA_KIND_EXECUTABLE,
Bytes => AMD_COMGR_DATA_KIND_BYTES,
Fatbin => AMD_COMGR_DATA_KIND_FATBIN,
Ar => AMD_COMGR_DATA_KIND_AR,
BcBundle => AMD_COMGR_DATA_KIND_BC_BUNDLE,
ArBundle => AMD_COMGR_DATA_KIND_AR_BUNDLE,
ObjBundle => AMD_COMGR_DATA_KIND_OBJ_BUNDLE
]
);
impl_into!(
Language,
amd_comgr_language_t,
[
None => AMD_COMGR_LANGUAGE_NONE,
OpenCl12 => AMD_COMGR_LANGUAGE_OPENCL_1_2,
OpenCl20 => AMD_COMGR_LANGUAGE_OPENCL_2_0,
Hip => AMD_COMGR_LANGUAGE_HIP,
LlvmIr => AMD_COMGR_LANGUAGE_LLVM_IR
]
);
#[cfg(unix)]
mod os {
pub static COMGR3: &'static str = "libamd_comgr.so.3";
pub static COMGR2: &'static str = "libamd_comgr.so.2";
}
#[cfg(windows)]
mod os {
pub static COMGR3: &'static str = "amd_comgr_3.dll";
pub static COMGR2: &'static str = "amd_comgr_2.dll";
}

View File

@ -77,21 +77,22 @@ impl<'a> Fatbin<'a> {
pub fn get_submodules(&self) -> Result<FatbinIter<'a>, FatbinError> {
match self.wrapper.version {
FatbincWrapper::VERSION_V2 =>
Ok(FatbinIter::V2(FatbinSubmoduleIterator {
fatbins: self.wrapper.filename_or_fatbins as *const *const std::ffi::c_void,
_phantom: std::marker::PhantomData,
})),
FatbincWrapper::VERSION_V2 => Ok(FatbinIter::V2(FatbinSubmoduleIterator {
fatbins: self.wrapper.filename_or_fatbins as *const *const std::ffi::c_void,
_phantom: std::marker::PhantomData,
})),
FatbincWrapper::VERSION_V1 => {
let header = parse_fatbin_header(&self.wrapper.data)
.map_err(FatbinError::ParseFailure)?;
let header =
parse_fatbin_header(&self.wrapper.data).map_err(FatbinError::ParseFailure)?;
Ok(FatbinIter::V1(Some(FatbinSubmodule::new(header))))
}
version => Err(FatbinError::ParseFailure(ParseError::UnexpectedBinaryField{
field_name: "FATBINC_VERSION",
observed: version,
expected: [FatbincWrapper::VERSION_V1, FatbincWrapper::VERSION_V2].into(),
})),
version => Err(FatbinError::ParseFailure(
ParseError::UnexpectedBinaryField {
field_name: "FATBINC_VERSION",
observed: version,
expected: [FatbincWrapper::VERSION_V1, FatbincWrapper::VERSION_V2].into(),
},
)),
}
}
}
@ -176,7 +177,6 @@ impl<'a> FatbinFile<'a> {
unsafe { self.get_payload().to_vec() }
};
while payload.last() == Some(&0) {
// remove trailing zeros
payload.pop();

View File

@ -259,12 +259,12 @@ dark_api! {
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
[0] = context_local_storage_put(
context: cuda_types::cuda::CUcontext,
key: *mut std::ffi::c_void,
value: *mut std::ffi::c_void,
key: *mut std::ffi::c_void,
value: *mut std::ffi::c_void,
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
dtor_cb: Option<extern "system" fn(
cuda_types::cuda::CUcontext,
*mut std::ffi::c_void,
*mut std::ffi::c_void,
*mut std::ffi::c_void,
)>
) -> cuda_types::cuda::CUresult,

View File

@ -1,4 +1,4 @@
#[allow(warnings)]
pub mod comgr2;
#[allow(warnings)]
pub mod comgr3;
pub mod comgr3;

View File

@ -37,4 +37,4 @@ impl CudaDisplay for FatbinHeader {
CudaDisplay::write(&self.files_size, "", 0, writer)?;
writer.write_all(b" }")
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -4,4 +4,3 @@ mod test;
pub use pass::to_llvm_module;
pub use pass::Attributes;

View File

@ -1,10 +1,13 @@
use std::ffi::CStr;
use super::*;
use super::super::*;
use llvm_zluda::{core::*};
use super::*;
use llvm_zluda::core::*;
pub(crate) fn run(context: &Context, attributes: Attributes) -> Result<llvm::Module, TranslateError> {
pub(crate) fn run(
context: &Context,
attributes: Attributes,
) -> Result<llvm::Module, TranslateError> {
let module = llvm::Module::new(context, LLVM_UNNAMED);
emit_attribute(context, &module, "clock_rate", attributes.clock_rate)?;
@ -16,7 +19,12 @@ pub(crate) fn run(context: &Context, attributes: Attributes) -> Result<llvm::Mod
Ok(module)
}
fn emit_attribute(context: &Context, module: &llvm::Module, name: &str, attribute: u32) -> Result<(), TranslateError> {
fn emit_attribute(
context: &Context,
module: &llvm::Module,
name: &str,
attribute: u32,
) -> Result<(), TranslateError> {
let name = format!("{}attribute_{}\0", ZLUDA_PTX_PREFIX, name).to_ascii_uppercase();
let name = unsafe { CStr::from_bytes_with_nul_unchecked(name.as_bytes()) };
let attribute_type = get_scalar_type(context.get(), ast::ScalarType::U32);
@ -31,4 +39,4 @@ fn emit_attribute(context: &Context, module: &llvm::Module, name: &str, attribut
unsafe { LLVMSetInitializer(global, LLVMConstInt(attribute_type, attribute as u64, 0)) };
unsafe { LLVMSetGlobalConstant(global, 1) };
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
pub(super) mod emit;
pub(super) mod attributes;
pub(super) mod emit;
use std::ffi::CStr;
use std::ops::Deref;
@ -44,9 +44,7 @@ pub struct Module(LLVMModuleRef);
impl Module {
fn new(ctx: &Context, name: &CStr) -> Self {
Self(
unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) },
)
Self(unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) })
}
fn get(&self) -> LLVMModuleRef {

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,9 @@ pub(crate) fn run(
for directive in directives.iter_mut() {
let (body_ref, is_kernel) = match directive {
Directive2::Method(Function2 {
body: Some(body), is_kernel, ..
body: Some(body),
is_kernel,
..
}) => (body, *is_kernel),
_ => continue,
};

View File

@ -9,7 +9,9 @@ fn parse_and_assert(ptx_text: &str) {
fn compile_and_assert(ptx_text: &str) -> Result<(), TranslateError> {
let ast = ast::parse_module_checked(ptx_text).unwrap();
let attributes = pass::Attributes { clock_rate: 2124000 };
let attributes = pass::Attributes {
clock_rate: 2124000,
};
crate::to_llvm_module(ast, attributes)?;
Ok(())
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -3581,7 +3581,7 @@ derive_parser!(
state.errors.push(PtxError::SyntaxError);
CpAsyncCpSize::Bytes4
});
let src_size = src_size
.and_then(|op| op.as_immediate())
.and_then(|imm| imm.as_u64());

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
use super::{context, driver};
use cuda_types::cuda::*;
use hip_runtime_sys::*;
use std::{mem, ptr};
use super::{driver, context};
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
@ -462,22 +462,21 @@ fn clamp_usize(x: usize) -> i32 {
usize::min(x, i32::MAX as usize) as i32
}
pub(crate) fn get_primary_context(hip_dev: hipDevice_t) -> Result<(&'static context::Context, CUcontext), CUerror> {
pub(crate) fn get_primary_context(
hip_dev: hipDevice_t,
) -> Result<(&'static context::Context, CUcontext), CUerror> {
let dev: &'static driver::Device = driver::device(hip_dev)?;
Ok(dev.primary_context())
}
pub(crate) fn primary_context_retain(
pctx: &mut CUcontext,
hip_dev: hipDevice_t,
) -> CUresult {
pub(crate) fn primary_context_retain(pctx: &mut CUcontext, hip_dev: hipDevice_t) -> CUresult {
let (ctx, cu_ctx) = get_primary_context(hip_dev)?;
ctx.with_state_mut(|state: &mut context::ContextState| {
state.ref_count += 1;
Ok(())
})?;
*pctx = cu_ctx;
Ok(())
}
@ -497,8 +496,6 @@ pub(crate) fn primary_context_release(hip_dev: hipDevice_t) -> CUresult {
pub(crate) fn primary_context_reset(hip_dev: hipDevice_t) -> CUresult {
let (ctx, _) = get_primary_context(hip_dev)?;
ctx.with_state_mut(|state| {
state.reset()
})?;
ctx.with_state_mut(|state| state.reset())?;
Ok(())
}
}

View File

@ -38,10 +38,7 @@ pub(crate) unsafe fn unload(library: CUlibrary) -> CUresult {
super::drop_checked::<Library>(library)
}
pub(crate) unsafe fn get_module(
out: &mut CUmodule,
library: &Library,
) -> CUresult {
*out = module::Module{base: library.base}.wrap();
pub(crate) unsafe fn get_module(out: &mut CUmodule, library: &Library) -> CUresult {
*out = module::Module { base: library.base }.wrap();
Ok(())
}

View File

@ -68,7 +68,9 @@ pub(crate) fn load_hip_module(image: *const std::ffi::c_void) -> Result<hipModul
unsafe { hipCtxGetDevice(&mut dev) }?;
let mut props = unsafe { mem::zeroed() };
unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
let attributes = ptx::Attributes { clock_rate: props.clockRate as u32 };
let attributes = ptx::Attributes {
clock_rate: props.clockRate as u32,
};
let llvm_module = ptx::to_llvm_module(ast, attributes).map_err(|_| CUerror::UNKNOWN)?;
let elf_module = comgr::compile_bitcode(
&global_state.comgr,
@ -91,7 +93,6 @@ pub(crate) fn load_data(module: &mut CUmodule, image: &std::ffi::c_void) -> CUre
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
super::drop_checked::<Module>(hmod)
}
pub(crate) fn get_function(

View File

@ -1,11 +1,10 @@
use cuda_types::cuda::CUerror;
use std::sync::atomic::{AtomicBool, Ordering};
pub(crate) mod r#impl;
#[cfg_attr(windows, path = "os_win.rs")]
#[cfg_attr(not(windows), path = "os_unix.rs")]
mod os;
pub(crate) mod r#impl;
static INITIALIZED: AtomicBool = AtomicBool::new(true);
pub(crate) fn initialized() -> bool {
@ -66,61 +65,60 @@ macro_rules! implemented_in_function {
cuda_macros::cuda_function_declarations!(
unimplemented,
implemented <= [
cuCtxCreate_v2,
cuCtxDestroy_v2,
cuCtxGetLimit,
cuCtxSetCurrent,
cuCtxGetCurrent,
cuCtxGetDevice,
cuCtxSetLimit,
cuCtxSynchronize,
cuCtxPushCurrent,
cuCtxPushCurrent_v2,
cuCtxPopCurrent,
cuCtxPopCurrent_v2,
cuDeviceComputeCapability,
cuDeviceGet,
cuDeviceGetAttribute,
cuDeviceGetCount,
cuDeviceGetLuid,
cuDeviceGetName,
cuDeviceGetProperties,
cuDeviceGetUuid,
cuDeviceGetUuid_v2,
cuDevicePrimaryCtxRelease,
cuDevicePrimaryCtxRetain,
cuDevicePrimaryCtxReset,
cuDeviceTotalMem_v2,
cuDriverGetVersion,
cuFuncGetAttribute,
cuGetExportTable,
cuGetProcAddress,
cuGetProcAddress_v2,
cuInit,
cuLibraryLoadData,
cuLibraryGetModule,
cuLibraryUnload,
cuMemAlloc_v2,
cuMemFree_v2,
cuMemHostAlloc,
cuMemFreeHost,
cuMemGetAddressRange_v2,
cuMemGetInfo_v2,
cuMemcpyDtoH_v2,
cuMemcpyHtoD_v2,
cuMemsetD32_v2,
cuMemsetD8_v2,
cuModuleGetFunction,
cuModuleGetLoadingMode,
cuModuleLoadData,
cuModuleUnload,
cuPointerGetAttribute,
cuStreamSynchronize,
cuProfilerStart,
cuProfilerStop,
],
implemented_in_function <= [
cuLaunchKernel,
]
implemented
<= [
cuCtxCreate_v2,
cuCtxDestroy_v2,
cuCtxGetLimit,
cuCtxSetCurrent,
cuCtxGetCurrent,
cuCtxGetDevice,
cuCtxSetLimit,
cuCtxSynchronize,
cuCtxPushCurrent,
cuCtxPushCurrent_v2,
cuCtxPopCurrent,
cuCtxPopCurrent_v2,
cuDeviceComputeCapability,
cuDeviceGet,
cuDeviceGetAttribute,
cuDeviceGetCount,
cuDeviceGetLuid,
cuDeviceGetName,
cuDeviceGetProperties,
cuDeviceGetUuid,
cuDeviceGetUuid_v2,
cuDevicePrimaryCtxRelease,
cuDevicePrimaryCtxRetain,
cuDevicePrimaryCtxReset,
cuDeviceTotalMem_v2,
cuDriverGetVersion,
cuFuncGetAttribute,
cuGetExportTable,
cuGetProcAddress,
cuGetProcAddress_v2,
cuInit,
cuLibraryLoadData,
cuLibraryGetModule,
cuLibraryUnload,
cuMemAlloc_v2,
cuMemFree_v2,
cuMemHostAlloc,
cuMemFreeHost,
cuMemGetAddressRange_v2,
cuMemGetInfo_v2,
cuMemcpyDtoH_v2,
cuMemcpyHtoD_v2,
cuMemsetD32_v2,
cuMemsetD8_v2,
cuModuleGetFunction,
cuModuleGetLoadingMode,
cuModuleLoadData,
cuModuleUnload,
cuPointerGetAttribute,
cuStreamSynchronize,
cuProfilerStart,
cuProfilerStop,
],
implemented_in_function <= [cuLaunchKernel,]
);

View File

@ -0,0 +1 @@

View File

@ -11,12 +11,16 @@ pub(crate) fn unimplemented() -> cublasStatus_t {
}
#[allow(non_snake_case)]
pub fn cublasGetStatusName(_status: cuda_types::cublas::cublasStatus_t) -> *const ::core::ffi::c_char {
pub fn cublasGetStatusName(
_status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub fn cublasGetStatusString(_status: cuda_types::cublas::cublasStatus_t) -> *const ::core::ffi::c_char {
pub fn cublasGetStatusString(
_status: cuda_types::cublas::cublasStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
@ -25,7 +29,6 @@ pub fn cublasXerbla(_srName: *const ::core::ffi::c_char, _info: ::core::ffi::c_i
todo!()
}
#[allow(non_snake_case)]
pub fn cublasGetCudartVersion() -> usize {
todo!()

View File

@ -28,10 +28,11 @@ macro_rules! implemented {
cuda_macros::cublas_function_declarations!(
unimplemented,
implemented <= [
cublasGetStatusName,
cublasGetStatusString,
cublasXerbla,
cublasGetCudartVersion
]
implemented
<= [
cublasGetStatusName,
cublasGetStatusString,
cublasXerbla,
cublasGetCudartVersion
]
);

View File

@ -28,11 +28,12 @@ macro_rules! implemented {
cuda_macros::cublaslt_function_declarations!(
unimplemented,
implemented <= [
cublasLtGetStatusName,
cublasLtGetStatusString,
cublasLtDisableCpuInstructionsSetMask,
cublasLtGetVersion,
cublasLtGetCudartVersion
]
implemented
<= [
cublasLtGetStatusName,
cublasLtGetStatusString,
cublasLtDisableCpuInstructionsSetMask,
cublasLtGetVersion,
cublasLtGetCudartVersion
]
);

View File

@ -28,11 +28,12 @@ macro_rules! implemented {
cuda_macros::cudnn9_function_declarations!(
unimplemented,
implemented <= [
cudnnGetVersion,
cudnnGetMaxDeviceVersion,
cudnnGetCudartVersion,
cudnnGetErrorString,
cudnnGetLastErrorString
]
implemented
<= [
cudnnGetVersion,
cudnnGetMaxDeviceVersion,
cudnnGetCudartVersion,
cudnnGetErrorString,
cudnnGetLastErrorString
]
);

View File

@ -420,13 +420,13 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApiDump {
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301 {
[0] = context_local_storage_put(
context: cuda_types::cuda::CUcontext,
key: *mut std::ffi::c_void,
key: *mut std::ffi::c_void,
value: *mut std::ffi::c_void,
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
dtor_cb: Option<extern "system" fn(
cuda_types::cuda::CUcontext,
*mut std::ffi::c_void,
*mut std::ffi::c_void,
*mut std::ffi::c_void,
*mut std::ffi::c_void,
)>
) -> cuda_types::cuda::CUresult,
[1] = context_local_storage_delete(
@ -434,9 +434,9 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApiDump {
key: *mut std::ffi::c_void
) -> cuda_types::cuda::CUresult,
[2] = context_local_storage_get(
value: *mut *mut std::ffi::c_void,
value: *mut *mut std::ffi::c_void,
cu_ctx: cuda_types::cuda::CUcontext,
key: *mut std::ffi::c_void
key: *mut std::ffi::c_void
) -> cuda_types::cuda::CUresult
}
}

View File

@ -13,6 +13,4 @@ macro_rules! unimplemented {
};
}
cuda_macros::cufft_function_declarations!(
unimplemented
);
cuda_macros::cufft_function_declarations!(unimplemented);

View File

@ -26,9 +26,5 @@ macro_rules! implemented_fn {
cuda_macros::nvml_function_declarations!(
unimplemented_fn,
implemented_fn <= [
nvmlErrorString,
nvmlInit_v2,
nvmlSystemGetDriverVersion
]
implemented_fn <= [nvmlErrorString, nvmlInit_v2, nvmlSystemGetDriverVersion]
);

View File

@ -28,12 +28,13 @@ macro_rules! implemented {
cuda_macros::cusparse_function_declarations!(
unimplemented,
implemented <= [
cusparseGetErrorName,
cusparseGetErrorString,
cusparseGetMatIndexBase,
cusparseGetMatDiagType,
cusparseGetMatFillMode,
cusparseGetMatType
]
implemented
<= [
cusparseGetErrorName,
cusparseGetErrorString,
cusparseGetMatIndexBase,
cusparseGetMatDiagType,
cusparseGetMatFillMode,
cusparseGetMatType
]
);