mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-02 06:47:46 +03:00
Check Rust formatting on pull requests (#451)
* Check Rust formatting on pull requests This should help us maintain consistent style, without having unrelated style changes in pull requests from running `rustfmt`. * cargo fmt non-generated files * Ignore generated files
This commit is contained in:
9
.github/workflows/pr_master.yml
vendored
9
.github/workflows/pr_master.yml
vendored
@ -11,6 +11,15 @@ env:
|
||||
ROCM_VERSION: "6.3.1"
|
||||
|
||||
jobs:
|
||||
formatting:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt
|
||||
- name: Check Rust formatting
|
||||
uses: actions-rust-lang/rustfmt@v1
|
||||
build_linux:
|
||||
name: Build (Linux)
|
||||
runs-on: ubuntu-22.04
|
||||
|
815
comgr/src/lib.rs
815
comgr/src/lib.rs
@ -1,407 +1,408 @@
|
||||
use amd_comgr_sys::*;
|
||||
use std::{ffi::CStr, mem, ptr};
|
||||
|
||||
macro_rules! call_dispatch_arg {
|
||||
(2, $arg:ident) => {
|
||||
$arg.comgr2()
|
||||
};
|
||||
(2, $arg:tt) => {
|
||||
#[allow(unused_braces)]
|
||||
$arg
|
||||
};
|
||||
(3, $arg:ident) => {
|
||||
$arg.comgr3()
|
||||
};
|
||||
(3, $arg:tt) => {
|
||||
#[allow(unused_braces)]
|
||||
$arg
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! call_dispatch {
|
||||
($src:expr => $fn_:ident( $($arg:tt),+ )) => {
|
||||
match $src {
|
||||
Comgr::V2(this) => unsafe { this. $fn_(
|
||||
$(
|
||||
call_dispatch_arg!(2, $arg),
|
||||
)+
|
||||
) }?,
|
||||
Comgr::V3(this) => unsafe { this. $fn_(
|
||||
$(
|
||||
call_dispatch_arg!(3, $arg),
|
||||
)+
|
||||
) }?,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! comgr_owned {
|
||||
($name:ident, $comgr_type:ident, $ctor:ident, $dtor:ident) => {
|
||||
struct $name<'a> {
|
||||
handle: u64,
|
||||
comgr: &'a Comgr,
|
||||
}
|
||||
|
||||
impl<'a> $name<'a> {
|
||||
fn new(comgr: &'a Comgr) -> Result<Self, Error> {
|
||||
let handle = match comgr {
|
||||
Comgr::V2(comgr) => {
|
||||
let mut result = unsafe { mem::zeroed() };
|
||||
unsafe { comgr.$ctor(&mut result)? };
|
||||
result.handle
|
||||
}
|
||||
Comgr::V3(comgr) => {
|
||||
let mut result = unsafe { mem::zeroed() };
|
||||
unsafe { comgr.$ctor(&mut result)? };
|
||||
result.handle
|
||||
}
|
||||
};
|
||||
Ok(Self { handle, comgr })
|
||||
}
|
||||
|
||||
fn comgr2(&self) -> amd_comgr_sys::comgr2::$comgr_type {
|
||||
amd_comgr_sys::comgr2::$comgr_type {
|
||||
handle: self.handle,
|
||||
}
|
||||
}
|
||||
|
||||
fn comgr3(&self) -> amd_comgr_sys::comgr3::$comgr_type {
|
||||
amd_comgr_sys::comgr3::$comgr_type {
|
||||
handle: self.handle,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for $name<'a> {
|
||||
fn drop(&mut self) {
|
||||
match self.comgr {
|
||||
Comgr::V2(comgr) => {
|
||||
unsafe {
|
||||
comgr.$dtor(amd_comgr_sys::comgr2::$comgr_type {
|
||||
handle: self.handle,
|
||||
})
|
||||
}
|
||||
.ok();
|
||||
}
|
||||
Comgr::V3(comgr) => {
|
||||
unsafe {
|
||||
comgr.$dtor(amd_comgr_sys::comgr3::$comgr_type {
|
||||
handle: self.handle,
|
||||
})
|
||||
}
|
||||
.ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
comgr_owned!(
|
||||
ActionInfo,
|
||||
amd_comgr_action_info_t,
|
||||
amd_comgr_create_action_info,
|
||||
amd_comgr_destroy_action_info
|
||||
);
|
||||
|
||||
impl<'a> ActionInfo<'a> {
|
||||
fn set_isa_name(&self, isa: &CStr) -> Result<(), Error> {
|
||||
let mut full_isa = "amdgcn-amd-amdhsa--".to_string().into_bytes();
|
||||
full_isa.extend(isa.to_bytes_with_nul());
|
||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_isa_name(self, { full_isa.as_ptr().cast() }));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_language(&self, language: Language) -> Result<(), Error> {
|
||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_language(self, language));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_options<'b>(&self, options: impl Iterator<Item = &'b CStr>) -> Result<(), Error> {
|
||||
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
|
||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_option_list(self, { options.as_ptr().cast_mut() }, { options.len() }));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
comgr_owned!(
|
||||
DataSet,
|
||||
amd_comgr_data_set_t,
|
||||
amd_comgr_create_data_set,
|
||||
amd_comgr_destroy_data_set
|
||||
);
|
||||
|
||||
impl<'a> DataSet<'a> {
|
||||
fn add(&self, data: &Data) -> Result<(), Error> {
|
||||
call_dispatch!(self.comgr => amd_comgr_data_set_add(self, data));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_data(&self, kind: DataKind, index: usize) -> Result<Data, Error> {
|
||||
let mut handle = 0u64;
|
||||
call_dispatch!(self.comgr => amd_comgr_action_data_get_data(self, kind, { index }, { std::ptr::from_mut(&mut handle).cast() }));
|
||||
Ok(Data(handle))
|
||||
}
|
||||
}
|
||||
|
||||
struct Data(u64);
|
||||
|
||||
impl Data {
|
||||
fn new(comgr: &Comgr, kind: DataKind, name: &CStr, content: &[u8]) -> Result<Self, Error> {
|
||||
let mut handle = 0u64;
|
||||
call_dispatch!(comgr => amd_comgr_create_data(kind, { std::ptr::from_mut(&mut handle).cast() }));
|
||||
let data = Data(handle);
|
||||
call_dispatch!(comgr => amd_comgr_set_data_name(data, { name.as_ptr() }));
|
||||
call_dispatch!(comgr => amd_comgr_set_data(data, { content.len() }, { content.as_ptr().cast() }));
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn comgr2(&self) -> comgr2::amd_comgr_data_t {
|
||||
comgr2::amd_comgr_data_s { handle: self.0 }
|
||||
}
|
||||
|
||||
fn comgr3(&self) -> comgr3::amd_comgr_data_t {
|
||||
comgr3::amd_comgr_data_s { handle: self.0 }
|
||||
}
|
||||
|
||||
fn copy_content(&self, comgr: &Comgr) -> Result<Vec<u8>, Error> {
|
||||
let mut size = unsafe { mem::zeroed() };
|
||||
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { ptr::null_mut() }));
|
||||
let mut result: Vec<u8> = Vec::with_capacity(size);
|
||||
unsafe { result.set_len(size) };
|
||||
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { result.as_mut_ptr().cast() }));
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compile_bitcode(
|
||||
comgr: &Comgr,
|
||||
gcn_arch: &CStr,
|
||||
main_buffer: &[u8],
|
||||
attributes_buffer: &[u8],
|
||||
ptx_impl: &[u8],
|
||||
) -> Result<Vec<u8>, Error> {
|
||||
let bitcode_data_set = DataSet::new(comgr)?;
|
||||
let main_bitcode_data = Data::new(comgr, DataKind::Bc, c"zluda.bc", main_buffer)?;
|
||||
bitcode_data_set.add(&main_bitcode_data)?;
|
||||
let attributes_bitcode_data = Data::new(comgr, DataKind::Bc, c"attributes.bc", attributes_buffer)?;
|
||||
bitcode_data_set.add(&attributes_bitcode_data)?;
|
||||
let stdlib_bitcode_data = Data::new(comgr, DataKind::Bc, c"ptx_impl.bc", ptx_impl)?;
|
||||
bitcode_data_set.add(&stdlib_bitcode_data)?;
|
||||
let linking_info = ActionInfo::new(comgr)?;
|
||||
let linked_data_set =
|
||||
comgr.do_action(ActionKind::LinkBcToBc, &linking_info, &bitcode_data_set)?;
|
||||
let compile_to_exec = ActionInfo::new(comgr)?;
|
||||
compile_to_exec.set_isa_name(gcn_arch)?;
|
||||
compile_to_exec.set_language(Language::LlvmIr)?;
|
||||
let common_options = [
|
||||
// This makes no sense, but it makes ockl linking work
|
||||
c"-Xclang",
|
||||
c"-mno-link-builtin-bitcode-postopt",
|
||||
// Otherwise LLVM omits dynamic fp mode for ockl functions during linking
|
||||
// and then fails to inline them
|
||||
c"-Xclang",
|
||||
c"-fdenormal-fp-math=dynamic",
|
||||
c"-O3",
|
||||
c"-mno-wavefrontsize64",
|
||||
c"-mcumode",
|
||||
// Useful for inlining reports, combined with AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stderr
|
||||
// c"-fsave-optimization-record=yaml",
|
||||
]
|
||||
.into_iter();
|
||||
let opt_options = if cfg!(debug_assertions) {
|
||||
//[c"-g", c"-mllvm", c"-print-before-all", c"", c""]
|
||||
[c"-g", c"", c"", c"", c""]
|
||||
} else {
|
||||
[
|
||||
c"-g0",
|
||||
// default inlining threshold times 10
|
||||
c"-mllvm",
|
||||
c"-inline-threshold=2250",
|
||||
c"-mllvm",
|
||||
c"-inlinehint-threshold=3250",
|
||||
]
|
||||
};
|
||||
compile_to_exec.set_options(common_options.chain(opt_options))?;
|
||||
let exec_data_set = comgr.do_action(
|
||||
ActionKind::CompileSourceToExecutable,
|
||||
&compile_to_exec,
|
||||
&linked_data_set,
|
||||
)?;
|
||||
let executable = exec_data_set.get_data(DataKind::Executable, 0)?;
|
||||
executable.copy_content(comgr)
|
||||
}
|
||||
|
||||
pub enum Comgr {
|
||||
V2(amd_comgr_sys::comgr2::Comgr2),
|
||||
V3(amd_comgr_sys::comgr3::Comgr3),
|
||||
}
|
||||
|
||||
impl Comgr {
|
||||
pub fn new() -> Result<Self, Error> {
|
||||
unsafe { libloading::Library::new(os::COMGR3) }
|
||||
.and_then(|lib| {
|
||||
Ok(Comgr::V3(unsafe {
|
||||
amd_comgr_sys::comgr3::Comgr3::from_library(lib)?
|
||||
}))
|
||||
})
|
||||
.or_else(|_| {
|
||||
unsafe { libloading::Library::new(os::COMGR2) }.and_then(|lib| {
|
||||
Ok(if Self::is_broken_v2(&lib) {
|
||||
Comgr::V3(unsafe { amd_comgr_sys::comgr3::Comgr3::from_library(lib)? })
|
||||
} else {
|
||||
Comgr::V2(unsafe { amd_comgr_sys::comgr2::Comgr2::from_library(lib)? })
|
||||
})
|
||||
})
|
||||
})
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
// For reasons unknown, on AMD Adrenalin 25.5.1, AMD ships amd_comgr_2.dll that shows up as
|
||||
// version 2.9.0, but actually uses the 3.X ABI. This is our best effort to detect it.
|
||||
// Version 25.3.1 returns 2.8.0, which seem to be the last version that actually uses the 2 ABI
|
||||
fn is_broken_v2(lib: &libloading::Library) -> bool {
|
||||
if cfg!(not(windows)) {
|
||||
return false;
|
||||
}
|
||||
let amd_comgr_get_version = match unsafe {
|
||||
lib.get::<unsafe extern "C" fn(major: *mut usize, minor: *mut usize)>(
|
||||
b"amd_comgr_get_version\0",
|
||||
)
|
||||
} {
|
||||
Ok(symbol) => symbol,
|
||||
Err(_) => return false,
|
||||
};
|
||||
let mut major = 0;
|
||||
let mut minor = 0;
|
||||
unsafe { (amd_comgr_get_version)(&mut major, &mut minor) };
|
||||
(major, minor) >= (2, 9)
|
||||
}
|
||||
|
||||
fn do_action(
|
||||
&self,
|
||||
kind: ActionKind,
|
||||
action: &ActionInfo,
|
||||
data_set: &DataSet,
|
||||
) -> Result<DataSet, Error> {
|
||||
let result = DataSet::new(self)?;
|
||||
call_dispatch!(self => amd_comgr_do_action(kind, action, data_set, result));
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error(pub ::std::num::NonZeroU32);
|
||||
|
||||
impl Error {
|
||||
#[doc = " A generic error has occurred."]
|
||||
pub const UNKNOWN: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(1) });
|
||||
#[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
|
||||
pub const INVALID_ARGUMENT: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(2) });
|
||||
#[doc = " Failed to allocate the necessary resources."]
|
||||
pub const OUT_OF_RESOURCES: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(3) });
|
||||
}
|
||||
|
||||
impl From<libloading::Error> for Error {
|
||||
fn from(_: libloading::Error) -> Self {
|
||||
Self::UNKNOWN
|
||||
}
|
||||
}
|
||||
|
||||
impl From<comgr2::amd_comgr_status_s> for Error {
|
||||
fn from(status: comgr2::amd_comgr_status_s) -> Self {
|
||||
Error(status.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<comgr3::amd_comgr_status_s> for Error {
|
||||
fn from(status: comgr3::amd_comgr_status_s) -> Self {
|
||||
Error(status.0)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_into {
|
||||
($self_type:ident, $to_type:ident, [$($from:ident => $to:ident),+]) => {
|
||||
#[derive(Copy, Clone)]
|
||||
#[allow(unused)]
|
||||
enum $self_type {
|
||||
$(
|
||||
$from,
|
||||
)+
|
||||
}
|
||||
|
||||
impl $self_type {
|
||||
fn comgr2(self) -> comgr2::$to_type {
|
||||
match self {
|
||||
$(
|
||||
Self:: $from => comgr2 :: $to_type :: $to,
|
||||
)+
|
||||
}
|
||||
}
|
||||
|
||||
fn comgr3(self) -> comgr3::$to_type {
|
||||
match self {
|
||||
$(
|
||||
Self:: $from => comgr3 :: $to_type :: $to,
|
||||
)+
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_into!(
|
||||
ActionKind,
|
||||
amd_comgr_action_kind_t,
|
||||
[
|
||||
LinkBcToBc => AMD_COMGR_ACTION_LINK_BC_TO_BC,
|
||||
CompileSourceToExecutable => AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE
|
||||
]
|
||||
);
|
||||
|
||||
impl_into!(
|
||||
DataKind,
|
||||
amd_comgr_data_kind_t,
|
||||
[
|
||||
Undef => AMD_COMGR_DATA_KIND_UNDEF,
|
||||
Source => AMD_COMGR_DATA_KIND_SOURCE,
|
||||
Include => AMD_COMGR_DATA_KIND_INCLUDE,
|
||||
PrecompiledHeader => AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER,
|
||||
Diagnostic => AMD_COMGR_DATA_KIND_DIAGNOSTIC,
|
||||
Log => AMD_COMGR_DATA_KIND_LOG,
|
||||
Bc => AMD_COMGR_DATA_KIND_BC,
|
||||
Relocatable => AMD_COMGR_DATA_KIND_RELOCATABLE,
|
||||
Executable => AMD_COMGR_DATA_KIND_EXECUTABLE,
|
||||
Bytes => AMD_COMGR_DATA_KIND_BYTES,
|
||||
Fatbin => AMD_COMGR_DATA_KIND_FATBIN,
|
||||
Ar => AMD_COMGR_DATA_KIND_AR,
|
||||
BcBundle => AMD_COMGR_DATA_KIND_BC_BUNDLE,
|
||||
ArBundle => AMD_COMGR_DATA_KIND_AR_BUNDLE,
|
||||
ObjBundle => AMD_COMGR_DATA_KIND_OBJ_BUNDLE
|
||||
|
||||
]
|
||||
);
|
||||
|
||||
impl_into!(
|
||||
Language,
|
||||
amd_comgr_language_t,
|
||||
[
|
||||
None => AMD_COMGR_LANGUAGE_NONE,
|
||||
OpenCl12 => AMD_COMGR_LANGUAGE_OPENCL_1_2,
|
||||
OpenCl20 => AMD_COMGR_LANGUAGE_OPENCL_2_0,
|
||||
Hip => AMD_COMGR_LANGUAGE_HIP,
|
||||
LlvmIr => AMD_COMGR_LANGUAGE_LLVM_IR
|
||||
]
|
||||
);
|
||||
|
||||
#[cfg(unix)]
|
||||
mod os {
|
||||
pub static COMGR3: &'static str = "libamd_comgr.so.3";
|
||||
pub static COMGR2: &'static str = "libamd_comgr.so.2";
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
mod os {
|
||||
pub static COMGR3: &'static str = "amd_comgr_3.dll";
|
||||
pub static COMGR2: &'static str = "amd_comgr_2.dll";
|
||||
}
|
||||
use amd_comgr_sys::*;
|
||||
use std::{ffi::CStr, mem, ptr};
|
||||
|
||||
macro_rules! call_dispatch_arg {
|
||||
(2, $arg:ident) => {
|
||||
$arg.comgr2()
|
||||
};
|
||||
(2, $arg:tt) => {
|
||||
#[allow(unused_braces)]
|
||||
$arg
|
||||
};
|
||||
(3, $arg:ident) => {
|
||||
$arg.comgr3()
|
||||
};
|
||||
(3, $arg:tt) => {
|
||||
#[allow(unused_braces)]
|
||||
$arg
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! call_dispatch {
|
||||
($src:expr => $fn_:ident( $($arg:tt),+ )) => {
|
||||
match $src {
|
||||
Comgr::V2(this) => unsafe { this. $fn_(
|
||||
$(
|
||||
call_dispatch_arg!(2, $arg),
|
||||
)+
|
||||
) }?,
|
||||
Comgr::V3(this) => unsafe { this. $fn_(
|
||||
$(
|
||||
call_dispatch_arg!(3, $arg),
|
||||
)+
|
||||
) }?,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! comgr_owned {
|
||||
($name:ident, $comgr_type:ident, $ctor:ident, $dtor:ident) => {
|
||||
struct $name<'a> {
|
||||
handle: u64,
|
||||
comgr: &'a Comgr,
|
||||
}
|
||||
|
||||
impl<'a> $name<'a> {
|
||||
fn new(comgr: &'a Comgr) -> Result<Self, Error> {
|
||||
let handle = match comgr {
|
||||
Comgr::V2(comgr) => {
|
||||
let mut result = unsafe { mem::zeroed() };
|
||||
unsafe { comgr.$ctor(&mut result)? };
|
||||
result.handle
|
||||
}
|
||||
Comgr::V3(comgr) => {
|
||||
let mut result = unsafe { mem::zeroed() };
|
||||
unsafe { comgr.$ctor(&mut result)? };
|
||||
result.handle
|
||||
}
|
||||
};
|
||||
Ok(Self { handle, comgr })
|
||||
}
|
||||
|
||||
fn comgr2(&self) -> amd_comgr_sys::comgr2::$comgr_type {
|
||||
amd_comgr_sys::comgr2::$comgr_type {
|
||||
handle: self.handle,
|
||||
}
|
||||
}
|
||||
|
||||
fn comgr3(&self) -> amd_comgr_sys::comgr3::$comgr_type {
|
||||
amd_comgr_sys::comgr3::$comgr_type {
|
||||
handle: self.handle,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for $name<'a> {
|
||||
fn drop(&mut self) {
|
||||
match self.comgr {
|
||||
Comgr::V2(comgr) => {
|
||||
unsafe {
|
||||
comgr.$dtor(amd_comgr_sys::comgr2::$comgr_type {
|
||||
handle: self.handle,
|
||||
})
|
||||
}
|
||||
.ok();
|
||||
}
|
||||
Comgr::V3(comgr) => {
|
||||
unsafe {
|
||||
comgr.$dtor(amd_comgr_sys::comgr3::$comgr_type {
|
||||
handle: self.handle,
|
||||
})
|
||||
}
|
||||
.ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
comgr_owned!(
|
||||
ActionInfo,
|
||||
amd_comgr_action_info_t,
|
||||
amd_comgr_create_action_info,
|
||||
amd_comgr_destroy_action_info
|
||||
);
|
||||
|
||||
impl<'a> ActionInfo<'a> {
|
||||
fn set_isa_name(&self, isa: &CStr) -> Result<(), Error> {
|
||||
let mut full_isa = "amdgcn-amd-amdhsa--".to_string().into_bytes();
|
||||
full_isa.extend(isa.to_bytes_with_nul());
|
||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_isa_name(self, { full_isa.as_ptr().cast() }));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_language(&self, language: Language) -> Result<(), Error> {
|
||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_language(self, language));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_options<'b>(&self, options: impl Iterator<Item = &'b CStr>) -> Result<(), Error> {
|
||||
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
|
||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_option_list(self, { options.as_ptr().cast_mut() }, { options.len() }));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
comgr_owned!(
|
||||
DataSet,
|
||||
amd_comgr_data_set_t,
|
||||
amd_comgr_create_data_set,
|
||||
amd_comgr_destroy_data_set
|
||||
);
|
||||
|
||||
impl<'a> DataSet<'a> {
|
||||
fn add(&self, data: &Data) -> Result<(), Error> {
|
||||
call_dispatch!(self.comgr => amd_comgr_data_set_add(self, data));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_data(&self, kind: DataKind, index: usize) -> Result<Data, Error> {
|
||||
let mut handle = 0u64;
|
||||
call_dispatch!(self.comgr => amd_comgr_action_data_get_data(self, kind, { index }, { std::ptr::from_mut(&mut handle).cast() }));
|
||||
Ok(Data(handle))
|
||||
}
|
||||
}
|
||||
|
||||
struct Data(u64);
|
||||
|
||||
impl Data {
|
||||
fn new(comgr: &Comgr, kind: DataKind, name: &CStr, content: &[u8]) -> Result<Self, Error> {
|
||||
let mut handle = 0u64;
|
||||
call_dispatch!(comgr => amd_comgr_create_data(kind, { std::ptr::from_mut(&mut handle).cast() }));
|
||||
let data = Data(handle);
|
||||
call_dispatch!(comgr => amd_comgr_set_data_name(data, { name.as_ptr() }));
|
||||
call_dispatch!(comgr => amd_comgr_set_data(data, { content.len() }, { content.as_ptr().cast() }));
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn comgr2(&self) -> comgr2::amd_comgr_data_t {
|
||||
comgr2::amd_comgr_data_s { handle: self.0 }
|
||||
}
|
||||
|
||||
fn comgr3(&self) -> comgr3::amd_comgr_data_t {
|
||||
comgr3::amd_comgr_data_s { handle: self.0 }
|
||||
}
|
||||
|
||||
fn copy_content(&self, comgr: &Comgr) -> Result<Vec<u8>, Error> {
|
||||
let mut size = unsafe { mem::zeroed() };
|
||||
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { ptr::null_mut() }));
|
||||
let mut result: Vec<u8> = Vec::with_capacity(size);
|
||||
unsafe { result.set_len(size) };
|
||||
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { result.as_mut_ptr().cast() }));
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compile_bitcode(
|
||||
comgr: &Comgr,
|
||||
gcn_arch: &CStr,
|
||||
main_buffer: &[u8],
|
||||
attributes_buffer: &[u8],
|
||||
ptx_impl: &[u8],
|
||||
) -> Result<Vec<u8>, Error> {
|
||||
let bitcode_data_set = DataSet::new(comgr)?;
|
||||
let main_bitcode_data = Data::new(comgr, DataKind::Bc, c"zluda.bc", main_buffer)?;
|
||||
bitcode_data_set.add(&main_bitcode_data)?;
|
||||
let attributes_bitcode_data =
|
||||
Data::new(comgr, DataKind::Bc, c"attributes.bc", attributes_buffer)?;
|
||||
bitcode_data_set.add(&attributes_bitcode_data)?;
|
||||
let stdlib_bitcode_data = Data::new(comgr, DataKind::Bc, c"ptx_impl.bc", ptx_impl)?;
|
||||
bitcode_data_set.add(&stdlib_bitcode_data)?;
|
||||
let linking_info = ActionInfo::new(comgr)?;
|
||||
let linked_data_set =
|
||||
comgr.do_action(ActionKind::LinkBcToBc, &linking_info, &bitcode_data_set)?;
|
||||
let compile_to_exec = ActionInfo::new(comgr)?;
|
||||
compile_to_exec.set_isa_name(gcn_arch)?;
|
||||
compile_to_exec.set_language(Language::LlvmIr)?;
|
||||
let common_options = [
|
||||
// This makes no sense, but it makes ockl linking work
|
||||
c"-Xclang",
|
||||
c"-mno-link-builtin-bitcode-postopt",
|
||||
// Otherwise LLVM omits dynamic fp mode for ockl functions during linking
|
||||
// and then fails to inline them
|
||||
c"-Xclang",
|
||||
c"-fdenormal-fp-math=dynamic",
|
||||
c"-O3",
|
||||
c"-mno-wavefrontsize64",
|
||||
c"-mcumode",
|
||||
// Useful for inlining reports, combined with AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stderr
|
||||
// c"-fsave-optimization-record=yaml",
|
||||
]
|
||||
.into_iter();
|
||||
let opt_options = if cfg!(debug_assertions) {
|
||||
//[c"-g", c"-mllvm", c"-print-before-all", c"", c""]
|
||||
[c"-g", c"", c"", c"", c""]
|
||||
} else {
|
||||
[
|
||||
c"-g0",
|
||||
// default inlining threshold times 10
|
||||
c"-mllvm",
|
||||
c"-inline-threshold=2250",
|
||||
c"-mllvm",
|
||||
c"-inlinehint-threshold=3250",
|
||||
]
|
||||
};
|
||||
compile_to_exec.set_options(common_options.chain(opt_options))?;
|
||||
let exec_data_set = comgr.do_action(
|
||||
ActionKind::CompileSourceToExecutable,
|
||||
&compile_to_exec,
|
||||
&linked_data_set,
|
||||
)?;
|
||||
let executable = exec_data_set.get_data(DataKind::Executable, 0)?;
|
||||
executable.copy_content(comgr)
|
||||
}
|
||||
|
||||
pub enum Comgr {
|
||||
V2(amd_comgr_sys::comgr2::Comgr2),
|
||||
V3(amd_comgr_sys::comgr3::Comgr3),
|
||||
}
|
||||
|
||||
impl Comgr {
|
||||
pub fn new() -> Result<Self, Error> {
|
||||
unsafe { libloading::Library::new(os::COMGR3) }
|
||||
.and_then(|lib| {
|
||||
Ok(Comgr::V3(unsafe {
|
||||
amd_comgr_sys::comgr3::Comgr3::from_library(lib)?
|
||||
}))
|
||||
})
|
||||
.or_else(|_| {
|
||||
unsafe { libloading::Library::new(os::COMGR2) }.and_then(|lib| {
|
||||
Ok(if Self::is_broken_v2(&lib) {
|
||||
Comgr::V3(unsafe { amd_comgr_sys::comgr3::Comgr3::from_library(lib)? })
|
||||
} else {
|
||||
Comgr::V2(unsafe { amd_comgr_sys::comgr2::Comgr2::from_library(lib)? })
|
||||
})
|
||||
})
|
||||
})
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
// For reasons unknown, on AMD Adrenalin 25.5.1, AMD ships amd_comgr_2.dll that shows up as
|
||||
// version 2.9.0, but actually uses the 3.X ABI. This is our best effort to detect it.
|
||||
// Version 25.3.1 returns 2.8.0, which seem to be the last version that actually uses the 2 ABI
|
||||
fn is_broken_v2(lib: &libloading::Library) -> bool {
|
||||
if cfg!(not(windows)) {
|
||||
return false;
|
||||
}
|
||||
let amd_comgr_get_version = match unsafe {
|
||||
lib.get::<unsafe extern "C" fn(major: *mut usize, minor: *mut usize)>(
|
||||
b"amd_comgr_get_version\0",
|
||||
)
|
||||
} {
|
||||
Ok(symbol) => symbol,
|
||||
Err(_) => return false,
|
||||
};
|
||||
let mut major = 0;
|
||||
let mut minor = 0;
|
||||
unsafe { (amd_comgr_get_version)(&mut major, &mut minor) };
|
||||
(major, minor) >= (2, 9)
|
||||
}
|
||||
|
||||
fn do_action(
|
||||
&self,
|
||||
kind: ActionKind,
|
||||
action: &ActionInfo,
|
||||
data_set: &DataSet,
|
||||
) -> Result<DataSet, Error> {
|
||||
let result = DataSet::new(self)?;
|
||||
call_dispatch!(self => amd_comgr_do_action(kind, action, data_set, result));
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error(pub ::std::num::NonZeroU32);
|
||||
|
||||
impl Error {
|
||||
#[doc = " A generic error has occurred."]
|
||||
pub const UNKNOWN: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(1) });
|
||||
#[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
|
||||
pub const INVALID_ARGUMENT: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(2) });
|
||||
#[doc = " Failed to allocate the necessary resources."]
|
||||
pub const OUT_OF_RESOURCES: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(3) });
|
||||
}
|
||||
|
||||
impl From<libloading::Error> for Error {
|
||||
fn from(_: libloading::Error) -> Self {
|
||||
Self::UNKNOWN
|
||||
}
|
||||
}
|
||||
|
||||
impl From<comgr2::amd_comgr_status_s> for Error {
|
||||
fn from(status: comgr2::amd_comgr_status_s) -> Self {
|
||||
Error(status.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<comgr3::amd_comgr_status_s> for Error {
|
||||
fn from(status: comgr3::amd_comgr_status_s) -> Self {
|
||||
Error(status.0)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_into {
|
||||
($self_type:ident, $to_type:ident, [$($from:ident => $to:ident),+]) => {
|
||||
#[derive(Copy, Clone)]
|
||||
#[allow(unused)]
|
||||
enum $self_type {
|
||||
$(
|
||||
$from,
|
||||
)+
|
||||
}
|
||||
|
||||
impl $self_type {
|
||||
fn comgr2(self) -> comgr2::$to_type {
|
||||
match self {
|
||||
$(
|
||||
Self:: $from => comgr2 :: $to_type :: $to,
|
||||
)+
|
||||
}
|
||||
}
|
||||
|
||||
fn comgr3(self) -> comgr3::$to_type {
|
||||
match self {
|
||||
$(
|
||||
Self:: $from => comgr3 :: $to_type :: $to,
|
||||
)+
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_into!(
|
||||
ActionKind,
|
||||
amd_comgr_action_kind_t,
|
||||
[
|
||||
LinkBcToBc => AMD_COMGR_ACTION_LINK_BC_TO_BC,
|
||||
CompileSourceToExecutable => AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE
|
||||
]
|
||||
);
|
||||
|
||||
impl_into!(
|
||||
DataKind,
|
||||
amd_comgr_data_kind_t,
|
||||
[
|
||||
Undef => AMD_COMGR_DATA_KIND_UNDEF,
|
||||
Source => AMD_COMGR_DATA_KIND_SOURCE,
|
||||
Include => AMD_COMGR_DATA_KIND_INCLUDE,
|
||||
PrecompiledHeader => AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER,
|
||||
Diagnostic => AMD_COMGR_DATA_KIND_DIAGNOSTIC,
|
||||
Log => AMD_COMGR_DATA_KIND_LOG,
|
||||
Bc => AMD_COMGR_DATA_KIND_BC,
|
||||
Relocatable => AMD_COMGR_DATA_KIND_RELOCATABLE,
|
||||
Executable => AMD_COMGR_DATA_KIND_EXECUTABLE,
|
||||
Bytes => AMD_COMGR_DATA_KIND_BYTES,
|
||||
Fatbin => AMD_COMGR_DATA_KIND_FATBIN,
|
||||
Ar => AMD_COMGR_DATA_KIND_AR,
|
||||
BcBundle => AMD_COMGR_DATA_KIND_BC_BUNDLE,
|
||||
ArBundle => AMD_COMGR_DATA_KIND_AR_BUNDLE,
|
||||
ObjBundle => AMD_COMGR_DATA_KIND_OBJ_BUNDLE
|
||||
|
||||
]
|
||||
);
|
||||
|
||||
impl_into!(
|
||||
Language,
|
||||
amd_comgr_language_t,
|
||||
[
|
||||
None => AMD_COMGR_LANGUAGE_NONE,
|
||||
OpenCl12 => AMD_COMGR_LANGUAGE_OPENCL_1_2,
|
||||
OpenCl20 => AMD_COMGR_LANGUAGE_OPENCL_2_0,
|
||||
Hip => AMD_COMGR_LANGUAGE_HIP,
|
||||
LlvmIr => AMD_COMGR_LANGUAGE_LLVM_IR
|
||||
]
|
||||
);
|
||||
|
||||
#[cfg(unix)]
|
||||
mod os {
|
||||
pub static COMGR3: &'static str = "libamd_comgr.so.3";
|
||||
pub static COMGR2: &'static str = "libamd_comgr.so.2";
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
mod os {
|
||||
pub static COMGR3: &'static str = "amd_comgr_3.dll";
|
||||
pub static COMGR2: &'static str = "amd_comgr_2.dll";
|
||||
}
|
||||
|
1
cuda_macros/.rustfmt.toml
Normal file
1
cuda_macros/.rustfmt.toml
Normal file
@ -0,0 +1 @@
|
||||
disable_all_formatting = true
|
1
cuda_types/.rustfmt.toml
Normal file
1
cuda_types/.rustfmt.toml
Normal file
@ -0,0 +1 @@
|
||||
disable_all_formatting = true
|
@ -77,21 +77,22 @@ impl<'a> Fatbin<'a> {
|
||||
|
||||
pub fn get_submodules(&self) -> Result<FatbinIter<'a>, FatbinError> {
|
||||
match self.wrapper.version {
|
||||
FatbincWrapper::VERSION_V2 =>
|
||||
Ok(FatbinIter::V2(FatbinSubmoduleIterator {
|
||||
fatbins: self.wrapper.filename_or_fatbins as *const *const std::ffi::c_void,
|
||||
_phantom: std::marker::PhantomData,
|
||||
})),
|
||||
FatbincWrapper::VERSION_V2 => Ok(FatbinIter::V2(FatbinSubmoduleIterator {
|
||||
fatbins: self.wrapper.filename_or_fatbins as *const *const std::ffi::c_void,
|
||||
_phantom: std::marker::PhantomData,
|
||||
})),
|
||||
FatbincWrapper::VERSION_V1 => {
|
||||
let header = parse_fatbin_header(&self.wrapper.data)
|
||||
.map_err(FatbinError::ParseFailure)?;
|
||||
let header =
|
||||
parse_fatbin_header(&self.wrapper.data).map_err(FatbinError::ParseFailure)?;
|
||||
Ok(FatbinIter::V1(Some(FatbinSubmodule::new(header))))
|
||||
}
|
||||
version => Err(FatbinError::ParseFailure(ParseError::UnexpectedBinaryField{
|
||||
field_name: "FATBINC_VERSION",
|
||||
observed: version,
|
||||
expected: [FatbincWrapper::VERSION_V1, FatbincWrapper::VERSION_V2].into(),
|
||||
})),
|
||||
version => Err(FatbinError::ParseFailure(
|
||||
ParseError::UnexpectedBinaryField {
|
||||
field_name: "FATBINC_VERSION",
|
||||
observed: version,
|
||||
expected: [FatbincWrapper::VERSION_V1, FatbincWrapper::VERSION_V2].into(),
|
||||
},
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -176,7 +177,6 @@ impl<'a> FatbinFile<'a> {
|
||||
unsafe { self.get_payload().to_vec() }
|
||||
};
|
||||
|
||||
|
||||
while payload.last() == Some(&0) {
|
||||
// remove trailing zeros
|
||||
payload.pop();
|
||||
|
@ -259,12 +259,12 @@ dark_api! {
|
||||
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
|
||||
[0] = context_local_storage_put(
|
||||
context: cuda_types::cuda::CUcontext,
|
||||
key: *mut std::ffi::c_void,
|
||||
value: *mut std::ffi::c_void,
|
||||
key: *mut std::ffi::c_void,
|
||||
value: *mut std::ffi::c_void,
|
||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||
dtor_cb: Option<extern "system" fn(
|
||||
cuda_types::cuda::CUcontext,
|
||||
*mut std::ffi::c_void,
|
||||
*mut std::ffi::c_void,
|
||||
*mut std::ffi::c_void,
|
||||
)>
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
|
2
ext/amd_comgr-sys/src/lib.rs
vendored
2
ext/amd_comgr-sys/src/lib.rs
vendored
@ -1,4 +1,4 @@
|
||||
#[allow(warnings)]
|
||||
pub mod comgr2;
|
||||
#[allow(warnings)]
|
||||
pub mod comgr3;
|
||||
pub mod comgr3;
|
||||
|
1
ext/hip_runtime-sys/.rustfmt.toml
vendored
Normal file
1
ext/hip_runtime-sys/.rustfmt.toml
vendored
Normal file
@ -0,0 +1 @@
|
||||
disable_all_formatting = true
|
1
ext/rocblas-sys/.rustfmt.toml
vendored
Normal file
1
ext/rocblas-sys/.rustfmt.toml
vendored
Normal file
@ -0,0 +1 @@
|
||||
disable_all_formatting = true
|
1
format/.rustfmt.toml
Normal file
1
format/.rustfmt.toml
Normal file
@ -0,0 +1 @@
|
||||
disable_all_formatting = true
|
@ -37,4 +37,4 @@ impl CudaDisplay for FatbinHeader {
|
||||
CudaDisplay::write(&self.files_size, "", 0, writer)?;
|
||||
writer.write_all(b" }")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
2635
format/src/lib.rs
2635
format/src/lib.rs
File diff suppressed because it is too large
Load Diff
@ -4,4 +4,3 @@ mod test;
|
||||
|
||||
pub use pass::to_llvm_module;
|
||||
pub use pass::Attributes;
|
||||
|
||||
|
@ -1,10 +1,13 @@
|
||||
use std::ffi::CStr;
|
||||
|
||||
use super::*;
|
||||
use super::super::*;
|
||||
use llvm_zluda::{core::*};
|
||||
use super::*;
|
||||
use llvm_zluda::core::*;
|
||||
|
||||
pub(crate) fn run(context: &Context, attributes: Attributes) -> Result<llvm::Module, TranslateError> {
|
||||
pub(crate) fn run(
|
||||
context: &Context,
|
||||
attributes: Attributes,
|
||||
) -> Result<llvm::Module, TranslateError> {
|
||||
let module = llvm::Module::new(context, LLVM_UNNAMED);
|
||||
|
||||
emit_attribute(context, &module, "clock_rate", attributes.clock_rate)?;
|
||||
@ -16,7 +19,12 @@ pub(crate) fn run(context: &Context, attributes: Attributes) -> Result<llvm::Mod
|
||||
Ok(module)
|
||||
}
|
||||
|
||||
fn emit_attribute(context: &Context, module: &llvm::Module, name: &str, attribute: u32) -> Result<(), TranslateError> {
|
||||
fn emit_attribute(
|
||||
context: &Context,
|
||||
module: &llvm::Module,
|
||||
name: &str,
|
||||
attribute: u32,
|
||||
) -> Result<(), TranslateError> {
|
||||
let name = format!("{}attribute_{}\0", ZLUDA_PTX_PREFIX, name).to_ascii_uppercase();
|
||||
let name = unsafe { CStr::from_bytes_with_nul_unchecked(name.as_bytes()) };
|
||||
let attribute_type = get_scalar_type(context.get(), ast::ScalarType::U32);
|
||||
@ -31,4 +39,4 @@ fn emit_attribute(context: &Context, module: &llvm::Module, name: &str, attribut
|
||||
unsafe { LLVMSetInitializer(global, LLVMConstInt(attribute_type, attribute as u64, 0)) };
|
||||
unsafe { LLVMSetGlobalConstant(global, 1) };
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
pub(super) mod emit;
|
||||
pub(super) mod attributes;
|
||||
pub(super) mod emit;
|
||||
|
||||
use std::ffi::CStr;
|
||||
use std::ops::Deref;
|
||||
@ -44,9 +44,7 @@ pub struct Module(LLVMModuleRef);
|
||||
|
||||
impl Module {
|
||||
fn new(ctx: &Context, name: &CStr) -> Self {
|
||||
Self(
|
||||
unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) },
|
||||
)
|
||||
Self(unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) })
|
||||
}
|
||||
|
||||
fn get(&self) -> LLVMModuleRef {
|
||||
|
1929
ptx/src/pass/mod.rs
1929
ptx/src/pass/mod.rs
File diff suppressed because it is too large
Load Diff
@ -21,7 +21,9 @@ pub(crate) fn run(
|
||||
for directive in directives.iter_mut() {
|
||||
let (body_ref, is_kernel) = match directive {
|
||||
Directive2::Method(Function2 {
|
||||
body: Some(body), is_kernel, ..
|
||||
body: Some(body),
|
||||
is_kernel,
|
||||
..
|
||||
}) => (body, *is_kernel),
|
||||
_ => continue,
|
||||
};
|
||||
|
@ -9,7 +9,9 @@ fn parse_and_assert(ptx_text: &str) {
|
||||
|
||||
fn compile_and_assert(ptx_text: &str) -> Result<(), TranslateError> {
|
||||
let ast = ast::parse_module_checked(ptx_text).unwrap();
|
||||
let attributes = pass::Attributes { clock_rate: 2124000 };
|
||||
let attributes = pass::Attributes {
|
||||
clock_rate: 2124000,
|
||||
};
|
||||
crate::to_llvm_module(ast, attributes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -3581,7 +3581,7 @@ derive_parser!(
|
||||
state.errors.push(PtxError::SyntaxError);
|
||||
CpAsyncCpSize::Bytes4
|
||||
});
|
||||
|
||||
|
||||
let src_size = src_size
|
||||
.and_then(|op| op.as_immediate())
|
||||
.and_then(|imm| imm.as_u64());
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
use super::{context, driver};
|
||||
use cuda_types::cuda::*;
|
||||
use hip_runtime_sys::*;
|
||||
use std::{mem, ptr};
|
||||
use super::{driver, context};
|
||||
|
||||
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
||||
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
||||
@ -462,22 +462,21 @@ fn clamp_usize(x: usize) -> i32 {
|
||||
usize::min(x, i32::MAX as usize) as i32
|
||||
}
|
||||
|
||||
pub(crate) fn get_primary_context(hip_dev: hipDevice_t) -> Result<(&'static context::Context, CUcontext), CUerror> {
|
||||
pub(crate) fn get_primary_context(
|
||||
hip_dev: hipDevice_t,
|
||||
) -> Result<(&'static context::Context, CUcontext), CUerror> {
|
||||
let dev: &'static driver::Device = driver::device(hip_dev)?;
|
||||
Ok(dev.primary_context())
|
||||
}
|
||||
|
||||
pub(crate) fn primary_context_retain(
|
||||
pctx: &mut CUcontext,
|
||||
hip_dev: hipDevice_t,
|
||||
) -> CUresult {
|
||||
pub(crate) fn primary_context_retain(pctx: &mut CUcontext, hip_dev: hipDevice_t) -> CUresult {
|
||||
let (ctx, cu_ctx) = get_primary_context(hip_dev)?;
|
||||
|
||||
|
||||
ctx.with_state_mut(|state: &mut context::ContextState| {
|
||||
state.ref_count += 1;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
|
||||
*pctx = cu_ctx;
|
||||
Ok(())
|
||||
}
|
||||
@ -497,8 +496,6 @@ pub(crate) fn primary_context_release(hip_dev: hipDevice_t) -> CUresult {
|
||||
|
||||
pub(crate) fn primary_context_reset(hip_dev: hipDevice_t) -> CUresult {
|
||||
let (ctx, _) = get_primary_context(hip_dev)?;
|
||||
ctx.with_state_mut(|state| {
|
||||
state.reset()
|
||||
})?;
|
||||
ctx.with_state_mut(|state| state.reset())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -38,10 +38,7 @@ pub(crate) unsafe fn unload(library: CUlibrary) -> CUresult {
|
||||
super::drop_checked::<Library>(library)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn get_module(
|
||||
out: &mut CUmodule,
|
||||
library: &Library,
|
||||
) -> CUresult {
|
||||
*out = module::Module{base: library.base}.wrap();
|
||||
pub(crate) unsafe fn get_module(out: &mut CUmodule, library: &Library) -> CUresult {
|
||||
*out = module::Module { base: library.base }.wrap();
|
||||
Ok(())
|
||||
}
|
||||
|
@ -68,7 +68,9 @@ pub(crate) fn load_hip_module(image: *const std::ffi::c_void) -> Result<hipModul
|
||||
unsafe { hipCtxGetDevice(&mut dev) }?;
|
||||
let mut props = unsafe { mem::zeroed() };
|
||||
unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
|
||||
let attributes = ptx::Attributes { clock_rate: props.clockRate as u32 };
|
||||
let attributes = ptx::Attributes {
|
||||
clock_rate: props.clockRate as u32,
|
||||
};
|
||||
let llvm_module = ptx::to_llvm_module(ast, attributes).map_err(|_| CUerror::UNKNOWN)?;
|
||||
let elf_module = comgr::compile_bitcode(
|
||||
&global_state.comgr,
|
||||
@ -91,7 +93,6 @@ pub(crate) fn load_data(module: &mut CUmodule, image: &std::ffi::c_void) -> CUre
|
||||
|
||||
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
|
||||
super::drop_checked::<Module>(hmod)
|
||||
|
||||
}
|
||||
|
||||
pub(crate) fn get_function(
|
||||
|
116
zluda/src/lib.rs
116
zluda/src/lib.rs
@ -1,11 +1,10 @@
|
||||
use cuda_types::cuda::CUerror;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
|
||||
pub(crate) mod r#impl;
|
||||
#[cfg_attr(windows, path = "os_win.rs")]
|
||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||
mod os;
|
||||
pub(crate) mod r#impl;
|
||||
|
||||
static INITIALIZED: AtomicBool = AtomicBool::new(true);
|
||||
pub(crate) fn initialized() -> bool {
|
||||
@ -66,61 +65,60 @@ macro_rules! implemented_in_function {
|
||||
|
||||
cuda_macros::cuda_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cuCtxCreate_v2,
|
||||
cuCtxDestroy_v2,
|
||||
cuCtxGetLimit,
|
||||
cuCtxSetCurrent,
|
||||
cuCtxGetCurrent,
|
||||
cuCtxGetDevice,
|
||||
cuCtxSetLimit,
|
||||
cuCtxSynchronize,
|
||||
cuCtxPushCurrent,
|
||||
cuCtxPushCurrent_v2,
|
||||
cuCtxPopCurrent,
|
||||
cuCtxPopCurrent_v2,
|
||||
cuDeviceComputeCapability,
|
||||
cuDeviceGet,
|
||||
cuDeviceGetAttribute,
|
||||
cuDeviceGetCount,
|
||||
cuDeviceGetLuid,
|
||||
cuDeviceGetName,
|
||||
cuDeviceGetProperties,
|
||||
cuDeviceGetUuid,
|
||||
cuDeviceGetUuid_v2,
|
||||
cuDevicePrimaryCtxRelease,
|
||||
cuDevicePrimaryCtxRetain,
|
||||
cuDevicePrimaryCtxReset,
|
||||
cuDeviceTotalMem_v2,
|
||||
cuDriverGetVersion,
|
||||
cuFuncGetAttribute,
|
||||
cuGetExportTable,
|
||||
cuGetProcAddress,
|
||||
cuGetProcAddress_v2,
|
||||
cuInit,
|
||||
cuLibraryLoadData,
|
||||
cuLibraryGetModule,
|
||||
cuLibraryUnload,
|
||||
cuMemAlloc_v2,
|
||||
cuMemFree_v2,
|
||||
cuMemHostAlloc,
|
||||
cuMemFreeHost,
|
||||
cuMemGetAddressRange_v2,
|
||||
cuMemGetInfo_v2,
|
||||
cuMemcpyDtoH_v2,
|
||||
cuMemcpyHtoD_v2,
|
||||
cuMemsetD32_v2,
|
||||
cuMemsetD8_v2,
|
||||
cuModuleGetFunction,
|
||||
cuModuleGetLoadingMode,
|
||||
cuModuleLoadData,
|
||||
cuModuleUnload,
|
||||
cuPointerGetAttribute,
|
||||
cuStreamSynchronize,
|
||||
cuProfilerStart,
|
||||
cuProfilerStop,
|
||||
],
|
||||
implemented_in_function <= [
|
||||
cuLaunchKernel,
|
||||
]
|
||||
implemented
|
||||
<= [
|
||||
cuCtxCreate_v2,
|
||||
cuCtxDestroy_v2,
|
||||
cuCtxGetLimit,
|
||||
cuCtxSetCurrent,
|
||||
cuCtxGetCurrent,
|
||||
cuCtxGetDevice,
|
||||
cuCtxSetLimit,
|
||||
cuCtxSynchronize,
|
||||
cuCtxPushCurrent,
|
||||
cuCtxPushCurrent_v2,
|
||||
cuCtxPopCurrent,
|
||||
cuCtxPopCurrent_v2,
|
||||
cuDeviceComputeCapability,
|
||||
cuDeviceGet,
|
||||
cuDeviceGetAttribute,
|
||||
cuDeviceGetCount,
|
||||
cuDeviceGetLuid,
|
||||
cuDeviceGetName,
|
||||
cuDeviceGetProperties,
|
||||
cuDeviceGetUuid,
|
||||
cuDeviceGetUuid_v2,
|
||||
cuDevicePrimaryCtxRelease,
|
||||
cuDevicePrimaryCtxRetain,
|
||||
cuDevicePrimaryCtxReset,
|
||||
cuDeviceTotalMem_v2,
|
||||
cuDriverGetVersion,
|
||||
cuFuncGetAttribute,
|
||||
cuGetExportTable,
|
||||
cuGetProcAddress,
|
||||
cuGetProcAddress_v2,
|
||||
cuInit,
|
||||
cuLibraryLoadData,
|
||||
cuLibraryGetModule,
|
||||
cuLibraryUnload,
|
||||
cuMemAlloc_v2,
|
||||
cuMemFree_v2,
|
||||
cuMemHostAlloc,
|
||||
cuMemFreeHost,
|
||||
cuMemGetAddressRange_v2,
|
||||
cuMemGetInfo_v2,
|
||||
cuMemcpyDtoH_v2,
|
||||
cuMemcpyHtoD_v2,
|
||||
cuMemsetD32_v2,
|
||||
cuMemsetD8_v2,
|
||||
cuModuleGetFunction,
|
||||
cuModuleGetLoadingMode,
|
||||
cuModuleLoadData,
|
||||
cuModuleUnload,
|
||||
cuPointerGetAttribute,
|
||||
cuStreamSynchronize,
|
||||
cuProfilerStart,
|
||||
cuProfilerStop,
|
||||
],
|
||||
implemented_in_function <= [cuLaunchKernel,]
|
||||
);
|
||||
|
@ -0,0 +1 @@
|
||||
|
@ -10,15 +10,11 @@ pub(crate) fn unimplemented() -> cublasStatus_t {
|
||||
cublasStatus_t::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
pub(crate) fn get_status_name(
|
||||
_status: cublasStatus_t,
|
||||
) -> *const ::core::ffi::c_char {
|
||||
pub(crate) fn get_status_name(_status: cublasStatus_t) -> *const ::core::ffi::c_char {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) fn get_status_string(
|
||||
_status: cublasStatus_t,
|
||||
) -> *const ::core::ffi::c_char {
|
||||
pub(crate) fn get_status_string(_status: cublasStatus_t) -> *const ::core::ffi::c_char {
|
||||
todo!()
|
||||
}
|
||||
|
||||
|
@ -28,10 +28,11 @@ macro_rules! implemented {
|
||||
|
||||
cuda_macros::cublas_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cublasGetStatusName,
|
||||
cublasGetStatusString,
|
||||
cublasXerbla,
|
||||
cublasGetCudartVersion,
|
||||
]
|
||||
implemented
|
||||
<= [
|
||||
cublasGetStatusName,
|
||||
cublasGetStatusString,
|
||||
cublasXerbla,
|
||||
cublasGetCudartVersion
|
||||
]
|
||||
);
|
||||
|
@ -31,8 +31,6 @@ pub(crate) fn get_cudart_version() -> usize {
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub(crate) fn disable_cpu_instructions_set_mask(
|
||||
_mask: ::core::ffi::c_uint,
|
||||
) -> ::core::ffi::c_uint {
|
||||
pub(crate) fn disable_cpu_instructions_set_mask(_mask: ::core::ffi::c_uint) -> ::core::ffi::c_uint {
|
||||
todo!()
|
||||
}
|
||||
|
@ -28,11 +28,12 @@ macro_rules! implemented {
|
||||
|
||||
cuda_macros::cublaslt_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cublasLtGetStatusName,
|
||||
cublasLtGetStatusString,
|
||||
cublasLtDisableCpuInstructionsSetMask,
|
||||
cublasLtGetVersion,
|
||||
cublasLtGetCudartVersion
|
||||
]
|
||||
implemented
|
||||
<= [
|
||||
cublasLtGetStatusName,
|
||||
cublasLtGetStatusString,
|
||||
cublasLtDisableCpuInstructionsSetMask,
|
||||
cublasLtGetVersion,
|
||||
cublasLtGetCudartVersion
|
||||
]
|
||||
);
|
||||
|
@ -28,11 +28,12 @@ macro_rules! implemented {
|
||||
|
||||
cuda_macros::cudnn9_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cudnnGetVersion,
|
||||
cudnnGetMaxDeviceVersion,
|
||||
cudnnGetCudartVersion,
|
||||
cudnnGetErrorString,
|
||||
cudnnGetLastErrorString
|
||||
]
|
||||
implemented
|
||||
<= [
|
||||
cudnnGetVersion,
|
||||
cudnnGetMaxDeviceVersion,
|
||||
cudnnGetCudartVersion,
|
||||
cudnnGetErrorString,
|
||||
cudnnGetLastErrorString
|
||||
]
|
||||
);
|
||||
|
@ -420,13 +420,13 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApiDump {
|
||||
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301 {
|
||||
[0] = context_local_storage_put(
|
||||
context: cuda_types::cuda::CUcontext,
|
||||
key: *mut std::ffi::c_void,
|
||||
key: *mut std::ffi::c_void,
|
||||
value: *mut std::ffi::c_void,
|
||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||
dtor_cb: Option<extern "system" fn(
|
||||
cuda_types::cuda::CUcontext,
|
||||
*mut std::ffi::c_void,
|
||||
*mut std::ffi::c_void,
|
||||
*mut std::ffi::c_void,
|
||||
*mut std::ffi::c_void,
|
||||
)>
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[1] = context_local_storage_delete(
|
||||
@ -434,9 +434,9 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApiDump {
|
||||
key: *mut std::ffi::c_void
|
||||
) -> cuda_types::cuda::CUresult,
|
||||
[2] = context_local_storage_get(
|
||||
value: *mut *mut std::ffi::c_void,
|
||||
value: *mut *mut std::ffi::c_void,
|
||||
cu_ctx: cuda_types::cuda::CUcontext,
|
||||
key: *mut std::ffi::c_void
|
||||
key: *mut std::ffi::c_void
|
||||
) -> cuda_types::cuda::CUresult
|
||||
}
|
||||
}
|
||||
|
@ -13,6 +13,4 @@ macro_rules! unimplemented {
|
||||
};
|
||||
}
|
||||
|
||||
cuda_macros::cufft_function_declarations!(
|
||||
unimplemented
|
||||
);
|
||||
cuda_macros::cufft_function_declarations!(unimplemented);
|
||||
|
@ -1,41 +1,39 @@
|
||||
use cuda_types::nvml::*;
|
||||
use std::{ffi::CStr, ptr};
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
nvmlReturn_t::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
pub(crate) fn error_string(
|
||||
_result: cuda_types::nvml::nvmlReturn_t,
|
||||
) -> *const ::core::ffi::c_char {
|
||||
c"".as_ptr()
|
||||
}
|
||||
|
||||
pub(crate) fn init_v2() -> cuda_types::nvml::nvmlReturn_t {
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
|
||||
const VERSION: &'static CStr = c"550.77";
|
||||
|
||||
pub(crate) fn system_get_driver_version(
|
||||
result: *mut ::core::ffi::c_char,
|
||||
length: ::core::ffi::c_uint,
|
||||
) -> cuda_types::nvml::nvmlReturn_t {
|
||||
if result == ptr::null_mut() {
|
||||
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
let version = VERSION.to_bytes_with_nul();
|
||||
let copy_length = usize::min(length as usize, version.len());
|
||||
let slice = unsafe { std::slice::from_raw_parts_mut(result.cast(), copy_length) };
|
||||
slice.copy_from_slice(&version[..copy_length]);
|
||||
if let Some(null) = slice.last_mut() {
|
||||
*null = 0;
|
||||
}
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
use cuda_types::nvml::*;
|
||||
use std::{ffi::CStr, ptr};
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||
nvmlReturn_t::ERROR_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
pub(crate) fn error_string(_result: cuda_types::nvml::nvmlReturn_t) -> *const ::core::ffi::c_char {
|
||||
c"".as_ptr()
|
||||
}
|
||||
|
||||
pub(crate) fn init_v2() -> cuda_types::nvml::nvmlReturn_t {
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
|
||||
const VERSION: &'static CStr = c"550.77";
|
||||
|
||||
pub(crate) fn system_get_driver_version(
|
||||
result: *mut ::core::ffi::c_char,
|
||||
length: ::core::ffi::c_uint,
|
||||
) -> cuda_types::nvml::nvmlReturn_t {
|
||||
if result == ptr::null_mut() {
|
||||
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
let version = VERSION.to_bytes_with_nul();
|
||||
let copy_length = usize::min(length as usize, version.len());
|
||||
let slice = unsafe { std::slice::from_raw_parts_mut(result.cast(), copy_length) };
|
||||
slice.copy_from_slice(&version[..copy_length]);
|
||||
if let Some(null) = slice.last_mut() {
|
||||
*null = 0;
|
||||
}
|
||||
nvmlReturn_t::SUCCESS
|
||||
}
|
||||
|
@ -26,9 +26,5 @@ macro_rules! implemented_fn {
|
||||
|
||||
cuda_macros::nvml_function_declarations!(
|
||||
unimplemented_fn,
|
||||
implemented_fn <= [
|
||||
nvmlErrorString,
|
||||
nvmlInit_v2,
|
||||
nvmlSystemGetDriverVersion
|
||||
]
|
||||
implemented_fn <= [nvmlErrorString, nvmlInit_v2, nvmlSystemGetDriverVersion]
|
||||
);
|
||||
|
@ -28,12 +28,13 @@ macro_rules! implemented {
|
||||
|
||||
cuda_macros::cusparse_function_declarations!(
|
||||
unimplemented,
|
||||
implemented <= [
|
||||
cusparseGetErrorName,
|
||||
cusparseGetErrorString,
|
||||
cusparseGetMatIndexBase,
|
||||
cusparseGetMatDiagType,
|
||||
cusparseGetMatFillMode,
|
||||
cusparseGetMatType
|
||||
]
|
||||
implemented
|
||||
<= [
|
||||
cusparseGetErrorName,
|
||||
cusparseGetErrorString,
|
||||
cusparseGetMatIndexBase,
|
||||
cusparseGetMatDiagType,
|
||||
cusparseGetMatFillMode,
|
||||
cusparseGetMatType
|
||||
]
|
||||
);
|
||||
|
Reference in New Issue
Block a user