mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-02 14:57:43 +03:00
Check Rust formatting on pull requests (#451)
* Check Rust formatting on pull requests This should help us maintain consistent style, without having unrelated style changes in pull requests from running `rustfmt`. * cargo fmt non-generated files * Ignore generated files
This commit is contained in:
9
.github/workflows/pr_master.yml
vendored
9
.github/workflows/pr_master.yml
vendored
@ -11,6 +11,15 @@ env:
|
|||||||
ROCM_VERSION: "6.3.1"
|
ROCM_VERSION: "6.3.1"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
formatting:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||||
|
with:
|
||||||
|
components: rustfmt
|
||||||
|
- name: Check Rust formatting
|
||||||
|
uses: actions-rust-lang/rustfmt@v1
|
||||||
build_linux:
|
build_linux:
|
||||||
name: Build (Linux)
|
name: Build (Linux)
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
815
comgr/src/lib.rs
815
comgr/src/lib.rs
@ -1,407 +1,408 @@
|
|||||||
use amd_comgr_sys::*;
|
use amd_comgr_sys::*;
|
||||||
use std::{ffi::CStr, mem, ptr};
|
use std::{ffi::CStr, mem, ptr};
|
||||||
|
|
||||||
macro_rules! call_dispatch_arg {
|
macro_rules! call_dispatch_arg {
|
||||||
(2, $arg:ident) => {
|
(2, $arg:ident) => {
|
||||||
$arg.comgr2()
|
$arg.comgr2()
|
||||||
};
|
};
|
||||||
(2, $arg:tt) => {
|
(2, $arg:tt) => {
|
||||||
#[allow(unused_braces)]
|
#[allow(unused_braces)]
|
||||||
$arg
|
$arg
|
||||||
};
|
};
|
||||||
(3, $arg:ident) => {
|
(3, $arg:ident) => {
|
||||||
$arg.comgr3()
|
$arg.comgr3()
|
||||||
};
|
};
|
||||||
(3, $arg:tt) => {
|
(3, $arg:tt) => {
|
||||||
#[allow(unused_braces)]
|
#[allow(unused_braces)]
|
||||||
$arg
|
$arg
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! call_dispatch {
|
macro_rules! call_dispatch {
|
||||||
($src:expr => $fn_:ident( $($arg:tt),+ )) => {
|
($src:expr => $fn_:ident( $($arg:tt),+ )) => {
|
||||||
match $src {
|
match $src {
|
||||||
Comgr::V2(this) => unsafe { this. $fn_(
|
Comgr::V2(this) => unsafe { this. $fn_(
|
||||||
$(
|
$(
|
||||||
call_dispatch_arg!(2, $arg),
|
call_dispatch_arg!(2, $arg),
|
||||||
)+
|
)+
|
||||||
) }?,
|
) }?,
|
||||||
Comgr::V3(this) => unsafe { this. $fn_(
|
Comgr::V3(this) => unsafe { this. $fn_(
|
||||||
$(
|
$(
|
||||||
call_dispatch_arg!(3, $arg),
|
call_dispatch_arg!(3, $arg),
|
||||||
)+
|
)+
|
||||||
) }?,
|
) }?,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! comgr_owned {
|
macro_rules! comgr_owned {
|
||||||
($name:ident, $comgr_type:ident, $ctor:ident, $dtor:ident) => {
|
($name:ident, $comgr_type:ident, $ctor:ident, $dtor:ident) => {
|
||||||
struct $name<'a> {
|
struct $name<'a> {
|
||||||
handle: u64,
|
handle: u64,
|
||||||
comgr: &'a Comgr,
|
comgr: &'a Comgr,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> $name<'a> {
|
impl<'a> $name<'a> {
|
||||||
fn new(comgr: &'a Comgr) -> Result<Self, Error> {
|
fn new(comgr: &'a Comgr) -> Result<Self, Error> {
|
||||||
let handle = match comgr {
|
let handle = match comgr {
|
||||||
Comgr::V2(comgr) => {
|
Comgr::V2(comgr) => {
|
||||||
let mut result = unsafe { mem::zeroed() };
|
let mut result = unsafe { mem::zeroed() };
|
||||||
unsafe { comgr.$ctor(&mut result)? };
|
unsafe { comgr.$ctor(&mut result)? };
|
||||||
result.handle
|
result.handle
|
||||||
}
|
}
|
||||||
Comgr::V3(comgr) => {
|
Comgr::V3(comgr) => {
|
||||||
let mut result = unsafe { mem::zeroed() };
|
let mut result = unsafe { mem::zeroed() };
|
||||||
unsafe { comgr.$ctor(&mut result)? };
|
unsafe { comgr.$ctor(&mut result)? };
|
||||||
result.handle
|
result.handle
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Ok(Self { handle, comgr })
|
Ok(Self { handle, comgr })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn comgr2(&self) -> amd_comgr_sys::comgr2::$comgr_type {
|
fn comgr2(&self) -> amd_comgr_sys::comgr2::$comgr_type {
|
||||||
amd_comgr_sys::comgr2::$comgr_type {
|
amd_comgr_sys::comgr2::$comgr_type {
|
||||||
handle: self.handle,
|
handle: self.handle,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn comgr3(&self) -> amd_comgr_sys::comgr3::$comgr_type {
|
fn comgr3(&self) -> amd_comgr_sys::comgr3::$comgr_type {
|
||||||
amd_comgr_sys::comgr3::$comgr_type {
|
amd_comgr_sys::comgr3::$comgr_type {
|
||||||
handle: self.handle,
|
handle: self.handle,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Drop for $name<'a> {
|
impl<'a> Drop for $name<'a> {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
match self.comgr {
|
match self.comgr {
|
||||||
Comgr::V2(comgr) => {
|
Comgr::V2(comgr) => {
|
||||||
unsafe {
|
unsafe {
|
||||||
comgr.$dtor(amd_comgr_sys::comgr2::$comgr_type {
|
comgr.$dtor(amd_comgr_sys::comgr2::$comgr_type {
|
||||||
handle: self.handle,
|
handle: self.handle,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
.ok();
|
.ok();
|
||||||
}
|
}
|
||||||
Comgr::V3(comgr) => {
|
Comgr::V3(comgr) => {
|
||||||
unsafe {
|
unsafe {
|
||||||
comgr.$dtor(amd_comgr_sys::comgr3::$comgr_type {
|
comgr.$dtor(amd_comgr_sys::comgr3::$comgr_type {
|
||||||
handle: self.handle,
|
handle: self.handle,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
.ok();
|
.ok();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
comgr_owned!(
|
comgr_owned!(
|
||||||
ActionInfo,
|
ActionInfo,
|
||||||
amd_comgr_action_info_t,
|
amd_comgr_action_info_t,
|
||||||
amd_comgr_create_action_info,
|
amd_comgr_create_action_info,
|
||||||
amd_comgr_destroy_action_info
|
amd_comgr_destroy_action_info
|
||||||
);
|
);
|
||||||
|
|
||||||
impl<'a> ActionInfo<'a> {
|
impl<'a> ActionInfo<'a> {
|
||||||
fn set_isa_name(&self, isa: &CStr) -> Result<(), Error> {
|
fn set_isa_name(&self, isa: &CStr) -> Result<(), Error> {
|
||||||
let mut full_isa = "amdgcn-amd-amdhsa--".to_string().into_bytes();
|
let mut full_isa = "amdgcn-amd-amdhsa--".to_string().into_bytes();
|
||||||
full_isa.extend(isa.to_bytes_with_nul());
|
full_isa.extend(isa.to_bytes_with_nul());
|
||||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_isa_name(self, { full_isa.as_ptr().cast() }));
|
call_dispatch!(self.comgr => amd_comgr_action_info_set_isa_name(self, { full_isa.as_ptr().cast() }));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_language(&self, language: Language) -> Result<(), Error> {
|
fn set_language(&self, language: Language) -> Result<(), Error> {
|
||||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_language(self, language));
|
call_dispatch!(self.comgr => amd_comgr_action_info_set_language(self, language));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_options<'b>(&self, options: impl Iterator<Item = &'b CStr>) -> Result<(), Error> {
|
fn set_options<'b>(&self, options: impl Iterator<Item = &'b CStr>) -> Result<(), Error> {
|
||||||
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
|
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
|
||||||
call_dispatch!(self.comgr => amd_comgr_action_info_set_option_list(self, { options.as_ptr().cast_mut() }, { options.len() }));
|
call_dispatch!(self.comgr => amd_comgr_action_info_set_option_list(self, { options.as_ptr().cast_mut() }, { options.len() }));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
comgr_owned!(
|
comgr_owned!(
|
||||||
DataSet,
|
DataSet,
|
||||||
amd_comgr_data_set_t,
|
amd_comgr_data_set_t,
|
||||||
amd_comgr_create_data_set,
|
amd_comgr_create_data_set,
|
||||||
amd_comgr_destroy_data_set
|
amd_comgr_destroy_data_set
|
||||||
);
|
);
|
||||||
|
|
||||||
impl<'a> DataSet<'a> {
|
impl<'a> DataSet<'a> {
|
||||||
fn add(&self, data: &Data) -> Result<(), Error> {
|
fn add(&self, data: &Data) -> Result<(), Error> {
|
||||||
call_dispatch!(self.comgr => amd_comgr_data_set_add(self, data));
|
call_dispatch!(self.comgr => amd_comgr_data_set_add(self, data));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_data(&self, kind: DataKind, index: usize) -> Result<Data, Error> {
|
fn get_data(&self, kind: DataKind, index: usize) -> Result<Data, Error> {
|
||||||
let mut handle = 0u64;
|
let mut handle = 0u64;
|
||||||
call_dispatch!(self.comgr => amd_comgr_action_data_get_data(self, kind, { index }, { std::ptr::from_mut(&mut handle).cast() }));
|
call_dispatch!(self.comgr => amd_comgr_action_data_get_data(self, kind, { index }, { std::ptr::from_mut(&mut handle).cast() }));
|
||||||
Ok(Data(handle))
|
Ok(Data(handle))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Data(u64);
|
struct Data(u64);
|
||||||
|
|
||||||
impl Data {
|
impl Data {
|
||||||
fn new(comgr: &Comgr, kind: DataKind, name: &CStr, content: &[u8]) -> Result<Self, Error> {
|
fn new(comgr: &Comgr, kind: DataKind, name: &CStr, content: &[u8]) -> Result<Self, Error> {
|
||||||
let mut handle = 0u64;
|
let mut handle = 0u64;
|
||||||
call_dispatch!(comgr => amd_comgr_create_data(kind, { std::ptr::from_mut(&mut handle).cast() }));
|
call_dispatch!(comgr => amd_comgr_create_data(kind, { std::ptr::from_mut(&mut handle).cast() }));
|
||||||
let data = Data(handle);
|
let data = Data(handle);
|
||||||
call_dispatch!(comgr => amd_comgr_set_data_name(data, { name.as_ptr() }));
|
call_dispatch!(comgr => amd_comgr_set_data_name(data, { name.as_ptr() }));
|
||||||
call_dispatch!(comgr => amd_comgr_set_data(data, { content.len() }, { content.as_ptr().cast() }));
|
call_dispatch!(comgr => amd_comgr_set_data(data, { content.len() }, { content.as_ptr().cast() }));
|
||||||
Ok(data)
|
Ok(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn comgr2(&self) -> comgr2::amd_comgr_data_t {
|
fn comgr2(&self) -> comgr2::amd_comgr_data_t {
|
||||||
comgr2::amd_comgr_data_s { handle: self.0 }
|
comgr2::amd_comgr_data_s { handle: self.0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn comgr3(&self) -> comgr3::amd_comgr_data_t {
|
fn comgr3(&self) -> comgr3::amd_comgr_data_t {
|
||||||
comgr3::amd_comgr_data_s { handle: self.0 }
|
comgr3::amd_comgr_data_s { handle: self.0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn copy_content(&self, comgr: &Comgr) -> Result<Vec<u8>, Error> {
|
fn copy_content(&self, comgr: &Comgr) -> Result<Vec<u8>, Error> {
|
||||||
let mut size = unsafe { mem::zeroed() };
|
let mut size = unsafe { mem::zeroed() };
|
||||||
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { ptr::null_mut() }));
|
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { ptr::null_mut() }));
|
||||||
let mut result: Vec<u8> = Vec::with_capacity(size);
|
let mut result: Vec<u8> = Vec::with_capacity(size);
|
||||||
unsafe { result.set_len(size) };
|
unsafe { result.set_len(size) };
|
||||||
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { result.as_mut_ptr().cast() }));
|
call_dispatch!(comgr => amd_comgr_get_data(self, { &mut size }, { result.as_mut_ptr().cast() }));
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn compile_bitcode(
|
pub fn compile_bitcode(
|
||||||
comgr: &Comgr,
|
comgr: &Comgr,
|
||||||
gcn_arch: &CStr,
|
gcn_arch: &CStr,
|
||||||
main_buffer: &[u8],
|
main_buffer: &[u8],
|
||||||
attributes_buffer: &[u8],
|
attributes_buffer: &[u8],
|
||||||
ptx_impl: &[u8],
|
ptx_impl: &[u8],
|
||||||
) -> Result<Vec<u8>, Error> {
|
) -> Result<Vec<u8>, Error> {
|
||||||
let bitcode_data_set = DataSet::new(comgr)?;
|
let bitcode_data_set = DataSet::new(comgr)?;
|
||||||
let main_bitcode_data = Data::new(comgr, DataKind::Bc, c"zluda.bc", main_buffer)?;
|
let main_bitcode_data = Data::new(comgr, DataKind::Bc, c"zluda.bc", main_buffer)?;
|
||||||
bitcode_data_set.add(&main_bitcode_data)?;
|
bitcode_data_set.add(&main_bitcode_data)?;
|
||||||
let attributes_bitcode_data = Data::new(comgr, DataKind::Bc, c"attributes.bc", attributes_buffer)?;
|
let attributes_bitcode_data =
|
||||||
bitcode_data_set.add(&attributes_bitcode_data)?;
|
Data::new(comgr, DataKind::Bc, c"attributes.bc", attributes_buffer)?;
|
||||||
let stdlib_bitcode_data = Data::new(comgr, DataKind::Bc, c"ptx_impl.bc", ptx_impl)?;
|
bitcode_data_set.add(&attributes_bitcode_data)?;
|
||||||
bitcode_data_set.add(&stdlib_bitcode_data)?;
|
let stdlib_bitcode_data = Data::new(comgr, DataKind::Bc, c"ptx_impl.bc", ptx_impl)?;
|
||||||
let linking_info = ActionInfo::new(comgr)?;
|
bitcode_data_set.add(&stdlib_bitcode_data)?;
|
||||||
let linked_data_set =
|
let linking_info = ActionInfo::new(comgr)?;
|
||||||
comgr.do_action(ActionKind::LinkBcToBc, &linking_info, &bitcode_data_set)?;
|
let linked_data_set =
|
||||||
let compile_to_exec = ActionInfo::new(comgr)?;
|
comgr.do_action(ActionKind::LinkBcToBc, &linking_info, &bitcode_data_set)?;
|
||||||
compile_to_exec.set_isa_name(gcn_arch)?;
|
let compile_to_exec = ActionInfo::new(comgr)?;
|
||||||
compile_to_exec.set_language(Language::LlvmIr)?;
|
compile_to_exec.set_isa_name(gcn_arch)?;
|
||||||
let common_options = [
|
compile_to_exec.set_language(Language::LlvmIr)?;
|
||||||
// This makes no sense, but it makes ockl linking work
|
let common_options = [
|
||||||
c"-Xclang",
|
// This makes no sense, but it makes ockl linking work
|
||||||
c"-mno-link-builtin-bitcode-postopt",
|
c"-Xclang",
|
||||||
// Otherwise LLVM omits dynamic fp mode for ockl functions during linking
|
c"-mno-link-builtin-bitcode-postopt",
|
||||||
// and then fails to inline them
|
// Otherwise LLVM omits dynamic fp mode for ockl functions during linking
|
||||||
c"-Xclang",
|
// and then fails to inline them
|
||||||
c"-fdenormal-fp-math=dynamic",
|
c"-Xclang",
|
||||||
c"-O3",
|
c"-fdenormal-fp-math=dynamic",
|
||||||
c"-mno-wavefrontsize64",
|
c"-O3",
|
||||||
c"-mcumode",
|
c"-mno-wavefrontsize64",
|
||||||
// Useful for inlining reports, combined with AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stderr
|
c"-mcumode",
|
||||||
// c"-fsave-optimization-record=yaml",
|
// Useful for inlining reports, combined with AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stderr
|
||||||
]
|
// c"-fsave-optimization-record=yaml",
|
||||||
.into_iter();
|
]
|
||||||
let opt_options = if cfg!(debug_assertions) {
|
.into_iter();
|
||||||
//[c"-g", c"-mllvm", c"-print-before-all", c"", c""]
|
let opt_options = if cfg!(debug_assertions) {
|
||||||
[c"-g", c"", c"", c"", c""]
|
//[c"-g", c"-mllvm", c"-print-before-all", c"", c""]
|
||||||
} else {
|
[c"-g", c"", c"", c"", c""]
|
||||||
[
|
} else {
|
||||||
c"-g0",
|
[
|
||||||
// default inlining threshold times 10
|
c"-g0",
|
||||||
c"-mllvm",
|
// default inlining threshold times 10
|
||||||
c"-inline-threshold=2250",
|
c"-mllvm",
|
||||||
c"-mllvm",
|
c"-inline-threshold=2250",
|
||||||
c"-inlinehint-threshold=3250",
|
c"-mllvm",
|
||||||
]
|
c"-inlinehint-threshold=3250",
|
||||||
};
|
]
|
||||||
compile_to_exec.set_options(common_options.chain(opt_options))?;
|
};
|
||||||
let exec_data_set = comgr.do_action(
|
compile_to_exec.set_options(common_options.chain(opt_options))?;
|
||||||
ActionKind::CompileSourceToExecutable,
|
let exec_data_set = comgr.do_action(
|
||||||
&compile_to_exec,
|
ActionKind::CompileSourceToExecutable,
|
||||||
&linked_data_set,
|
&compile_to_exec,
|
||||||
)?;
|
&linked_data_set,
|
||||||
let executable = exec_data_set.get_data(DataKind::Executable, 0)?;
|
)?;
|
||||||
executable.copy_content(comgr)
|
let executable = exec_data_set.get_data(DataKind::Executable, 0)?;
|
||||||
}
|
executable.copy_content(comgr)
|
||||||
|
}
|
||||||
pub enum Comgr {
|
|
||||||
V2(amd_comgr_sys::comgr2::Comgr2),
|
pub enum Comgr {
|
||||||
V3(amd_comgr_sys::comgr3::Comgr3),
|
V2(amd_comgr_sys::comgr2::Comgr2),
|
||||||
}
|
V3(amd_comgr_sys::comgr3::Comgr3),
|
||||||
|
}
|
||||||
impl Comgr {
|
|
||||||
pub fn new() -> Result<Self, Error> {
|
impl Comgr {
|
||||||
unsafe { libloading::Library::new(os::COMGR3) }
|
pub fn new() -> Result<Self, Error> {
|
||||||
.and_then(|lib| {
|
unsafe { libloading::Library::new(os::COMGR3) }
|
||||||
Ok(Comgr::V3(unsafe {
|
.and_then(|lib| {
|
||||||
amd_comgr_sys::comgr3::Comgr3::from_library(lib)?
|
Ok(Comgr::V3(unsafe {
|
||||||
}))
|
amd_comgr_sys::comgr3::Comgr3::from_library(lib)?
|
||||||
})
|
}))
|
||||||
.or_else(|_| {
|
})
|
||||||
unsafe { libloading::Library::new(os::COMGR2) }.and_then(|lib| {
|
.or_else(|_| {
|
||||||
Ok(if Self::is_broken_v2(&lib) {
|
unsafe { libloading::Library::new(os::COMGR2) }.and_then(|lib| {
|
||||||
Comgr::V3(unsafe { amd_comgr_sys::comgr3::Comgr3::from_library(lib)? })
|
Ok(if Self::is_broken_v2(&lib) {
|
||||||
} else {
|
Comgr::V3(unsafe { amd_comgr_sys::comgr3::Comgr3::from_library(lib)? })
|
||||||
Comgr::V2(unsafe { amd_comgr_sys::comgr2::Comgr2::from_library(lib)? })
|
} else {
|
||||||
})
|
Comgr::V2(unsafe { amd_comgr_sys::comgr2::Comgr2::from_library(lib)? })
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.map_err(Into::into)
|
})
|
||||||
}
|
.map_err(Into::into)
|
||||||
|
}
|
||||||
// For reasons unknown, on AMD Adrenalin 25.5.1, AMD ships amd_comgr_2.dll that shows up as
|
|
||||||
// version 2.9.0, but actually uses the 3.X ABI. This is our best effort to detect it.
|
// For reasons unknown, on AMD Adrenalin 25.5.1, AMD ships amd_comgr_2.dll that shows up as
|
||||||
// Version 25.3.1 returns 2.8.0, which seem to be the last version that actually uses the 2 ABI
|
// version 2.9.0, but actually uses the 3.X ABI. This is our best effort to detect it.
|
||||||
fn is_broken_v2(lib: &libloading::Library) -> bool {
|
// Version 25.3.1 returns 2.8.0, which seem to be the last version that actually uses the 2 ABI
|
||||||
if cfg!(not(windows)) {
|
fn is_broken_v2(lib: &libloading::Library) -> bool {
|
||||||
return false;
|
if cfg!(not(windows)) {
|
||||||
}
|
return false;
|
||||||
let amd_comgr_get_version = match unsafe {
|
}
|
||||||
lib.get::<unsafe extern "C" fn(major: *mut usize, minor: *mut usize)>(
|
let amd_comgr_get_version = match unsafe {
|
||||||
b"amd_comgr_get_version\0",
|
lib.get::<unsafe extern "C" fn(major: *mut usize, minor: *mut usize)>(
|
||||||
)
|
b"amd_comgr_get_version\0",
|
||||||
} {
|
)
|
||||||
Ok(symbol) => symbol,
|
} {
|
||||||
Err(_) => return false,
|
Ok(symbol) => symbol,
|
||||||
};
|
Err(_) => return false,
|
||||||
let mut major = 0;
|
};
|
||||||
let mut minor = 0;
|
let mut major = 0;
|
||||||
unsafe { (amd_comgr_get_version)(&mut major, &mut minor) };
|
let mut minor = 0;
|
||||||
(major, minor) >= (2, 9)
|
unsafe { (amd_comgr_get_version)(&mut major, &mut minor) };
|
||||||
}
|
(major, minor) >= (2, 9)
|
||||||
|
}
|
||||||
fn do_action(
|
|
||||||
&self,
|
fn do_action(
|
||||||
kind: ActionKind,
|
&self,
|
||||||
action: &ActionInfo,
|
kind: ActionKind,
|
||||||
data_set: &DataSet,
|
action: &ActionInfo,
|
||||||
) -> Result<DataSet, Error> {
|
data_set: &DataSet,
|
||||||
let result = DataSet::new(self)?;
|
) -> Result<DataSet, Error> {
|
||||||
call_dispatch!(self => amd_comgr_do_action(kind, action, data_set, result));
|
let result = DataSet::new(self)?;
|
||||||
Ok(result)
|
call_dispatch!(self => amd_comgr_do_action(kind, action, data_set, result));
|
||||||
}
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Error(pub ::std::num::NonZeroU32);
|
#[derive(Debug)]
|
||||||
|
pub struct Error(pub ::std::num::NonZeroU32);
|
||||||
impl Error {
|
|
||||||
#[doc = " A generic error has occurred."]
|
impl Error {
|
||||||
pub const UNKNOWN: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(1) });
|
#[doc = " A generic error has occurred."]
|
||||||
#[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
|
pub const UNKNOWN: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(1) });
|
||||||
pub const INVALID_ARGUMENT: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(2) });
|
#[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
|
||||||
#[doc = " Failed to allocate the necessary resources."]
|
pub const INVALID_ARGUMENT: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(2) });
|
||||||
pub const OUT_OF_RESOURCES: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(3) });
|
#[doc = " Failed to allocate the necessary resources."]
|
||||||
}
|
pub const OUT_OF_RESOURCES: Error = Error(unsafe { ::std::num::NonZeroU32::new_unchecked(3) });
|
||||||
|
}
|
||||||
impl From<libloading::Error> for Error {
|
|
||||||
fn from(_: libloading::Error) -> Self {
|
impl From<libloading::Error> for Error {
|
||||||
Self::UNKNOWN
|
fn from(_: libloading::Error) -> Self {
|
||||||
}
|
Self::UNKNOWN
|
||||||
}
|
}
|
||||||
|
}
|
||||||
impl From<comgr2::amd_comgr_status_s> for Error {
|
|
||||||
fn from(status: comgr2::amd_comgr_status_s) -> Self {
|
impl From<comgr2::amd_comgr_status_s> for Error {
|
||||||
Error(status.0)
|
fn from(status: comgr2::amd_comgr_status_s) -> Self {
|
||||||
}
|
Error(status.0)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
impl From<comgr3::amd_comgr_status_s> for Error {
|
|
||||||
fn from(status: comgr3::amd_comgr_status_s) -> Self {
|
impl From<comgr3::amd_comgr_status_s> for Error {
|
||||||
Error(status.0)
|
fn from(status: comgr3::amd_comgr_status_s) -> Self {
|
||||||
}
|
Error(status.0)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
macro_rules! impl_into {
|
|
||||||
($self_type:ident, $to_type:ident, [$($from:ident => $to:ident),+]) => {
|
macro_rules! impl_into {
|
||||||
#[derive(Copy, Clone)]
|
($self_type:ident, $to_type:ident, [$($from:ident => $to:ident),+]) => {
|
||||||
#[allow(unused)]
|
#[derive(Copy, Clone)]
|
||||||
enum $self_type {
|
#[allow(unused)]
|
||||||
$(
|
enum $self_type {
|
||||||
$from,
|
$(
|
||||||
)+
|
$from,
|
||||||
}
|
)+
|
||||||
|
}
|
||||||
impl $self_type {
|
|
||||||
fn comgr2(self) -> comgr2::$to_type {
|
impl $self_type {
|
||||||
match self {
|
fn comgr2(self) -> comgr2::$to_type {
|
||||||
$(
|
match self {
|
||||||
Self:: $from => comgr2 :: $to_type :: $to,
|
$(
|
||||||
)+
|
Self:: $from => comgr2 :: $to_type :: $to,
|
||||||
}
|
)+
|
||||||
}
|
}
|
||||||
|
}
|
||||||
fn comgr3(self) -> comgr3::$to_type {
|
|
||||||
match self {
|
fn comgr3(self) -> comgr3::$to_type {
|
||||||
$(
|
match self {
|
||||||
Self:: $from => comgr3 :: $to_type :: $to,
|
$(
|
||||||
)+
|
Self:: $from => comgr3 :: $to_type :: $to,
|
||||||
}
|
)+
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
}
|
};
|
||||||
|
}
|
||||||
impl_into!(
|
|
||||||
ActionKind,
|
impl_into!(
|
||||||
amd_comgr_action_kind_t,
|
ActionKind,
|
||||||
[
|
amd_comgr_action_kind_t,
|
||||||
LinkBcToBc => AMD_COMGR_ACTION_LINK_BC_TO_BC,
|
[
|
||||||
CompileSourceToExecutable => AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE
|
LinkBcToBc => AMD_COMGR_ACTION_LINK_BC_TO_BC,
|
||||||
]
|
CompileSourceToExecutable => AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE
|
||||||
);
|
]
|
||||||
|
);
|
||||||
impl_into!(
|
|
||||||
DataKind,
|
impl_into!(
|
||||||
amd_comgr_data_kind_t,
|
DataKind,
|
||||||
[
|
amd_comgr_data_kind_t,
|
||||||
Undef => AMD_COMGR_DATA_KIND_UNDEF,
|
[
|
||||||
Source => AMD_COMGR_DATA_KIND_SOURCE,
|
Undef => AMD_COMGR_DATA_KIND_UNDEF,
|
||||||
Include => AMD_COMGR_DATA_KIND_INCLUDE,
|
Source => AMD_COMGR_DATA_KIND_SOURCE,
|
||||||
PrecompiledHeader => AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER,
|
Include => AMD_COMGR_DATA_KIND_INCLUDE,
|
||||||
Diagnostic => AMD_COMGR_DATA_KIND_DIAGNOSTIC,
|
PrecompiledHeader => AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER,
|
||||||
Log => AMD_COMGR_DATA_KIND_LOG,
|
Diagnostic => AMD_COMGR_DATA_KIND_DIAGNOSTIC,
|
||||||
Bc => AMD_COMGR_DATA_KIND_BC,
|
Log => AMD_COMGR_DATA_KIND_LOG,
|
||||||
Relocatable => AMD_COMGR_DATA_KIND_RELOCATABLE,
|
Bc => AMD_COMGR_DATA_KIND_BC,
|
||||||
Executable => AMD_COMGR_DATA_KIND_EXECUTABLE,
|
Relocatable => AMD_COMGR_DATA_KIND_RELOCATABLE,
|
||||||
Bytes => AMD_COMGR_DATA_KIND_BYTES,
|
Executable => AMD_COMGR_DATA_KIND_EXECUTABLE,
|
||||||
Fatbin => AMD_COMGR_DATA_KIND_FATBIN,
|
Bytes => AMD_COMGR_DATA_KIND_BYTES,
|
||||||
Ar => AMD_COMGR_DATA_KIND_AR,
|
Fatbin => AMD_COMGR_DATA_KIND_FATBIN,
|
||||||
BcBundle => AMD_COMGR_DATA_KIND_BC_BUNDLE,
|
Ar => AMD_COMGR_DATA_KIND_AR,
|
||||||
ArBundle => AMD_COMGR_DATA_KIND_AR_BUNDLE,
|
BcBundle => AMD_COMGR_DATA_KIND_BC_BUNDLE,
|
||||||
ObjBundle => AMD_COMGR_DATA_KIND_OBJ_BUNDLE
|
ArBundle => AMD_COMGR_DATA_KIND_AR_BUNDLE,
|
||||||
|
ObjBundle => AMD_COMGR_DATA_KIND_OBJ_BUNDLE
|
||||||
]
|
|
||||||
);
|
]
|
||||||
|
);
|
||||||
impl_into!(
|
|
||||||
Language,
|
impl_into!(
|
||||||
amd_comgr_language_t,
|
Language,
|
||||||
[
|
amd_comgr_language_t,
|
||||||
None => AMD_COMGR_LANGUAGE_NONE,
|
[
|
||||||
OpenCl12 => AMD_COMGR_LANGUAGE_OPENCL_1_2,
|
None => AMD_COMGR_LANGUAGE_NONE,
|
||||||
OpenCl20 => AMD_COMGR_LANGUAGE_OPENCL_2_0,
|
OpenCl12 => AMD_COMGR_LANGUAGE_OPENCL_1_2,
|
||||||
Hip => AMD_COMGR_LANGUAGE_HIP,
|
OpenCl20 => AMD_COMGR_LANGUAGE_OPENCL_2_0,
|
||||||
LlvmIr => AMD_COMGR_LANGUAGE_LLVM_IR
|
Hip => AMD_COMGR_LANGUAGE_HIP,
|
||||||
]
|
LlvmIr => AMD_COMGR_LANGUAGE_LLVM_IR
|
||||||
);
|
]
|
||||||
|
);
|
||||||
#[cfg(unix)]
|
|
||||||
mod os {
|
#[cfg(unix)]
|
||||||
pub static COMGR3: &'static str = "libamd_comgr.so.3";
|
mod os {
|
||||||
pub static COMGR2: &'static str = "libamd_comgr.so.2";
|
pub static COMGR3: &'static str = "libamd_comgr.so.3";
|
||||||
}
|
pub static COMGR2: &'static str = "libamd_comgr.so.2";
|
||||||
|
}
|
||||||
#[cfg(windows)]
|
|
||||||
mod os {
|
#[cfg(windows)]
|
||||||
pub static COMGR3: &'static str = "amd_comgr_3.dll";
|
mod os {
|
||||||
pub static COMGR2: &'static str = "amd_comgr_2.dll";
|
pub static COMGR3: &'static str = "amd_comgr_3.dll";
|
||||||
}
|
pub static COMGR2: &'static str = "amd_comgr_2.dll";
|
||||||
|
}
|
||||||
|
1
cuda_macros/.rustfmt.toml
Normal file
1
cuda_macros/.rustfmt.toml
Normal file
@ -0,0 +1 @@
|
|||||||
|
disable_all_formatting = true
|
1
cuda_types/.rustfmt.toml
Normal file
1
cuda_types/.rustfmt.toml
Normal file
@ -0,0 +1 @@
|
|||||||
|
disable_all_formatting = true
|
@ -77,21 +77,22 @@ impl<'a> Fatbin<'a> {
|
|||||||
|
|
||||||
pub fn get_submodules(&self) -> Result<FatbinIter<'a>, FatbinError> {
|
pub fn get_submodules(&self) -> Result<FatbinIter<'a>, FatbinError> {
|
||||||
match self.wrapper.version {
|
match self.wrapper.version {
|
||||||
FatbincWrapper::VERSION_V2 =>
|
FatbincWrapper::VERSION_V2 => Ok(FatbinIter::V2(FatbinSubmoduleIterator {
|
||||||
Ok(FatbinIter::V2(FatbinSubmoduleIterator {
|
fatbins: self.wrapper.filename_or_fatbins as *const *const std::ffi::c_void,
|
||||||
fatbins: self.wrapper.filename_or_fatbins as *const *const std::ffi::c_void,
|
_phantom: std::marker::PhantomData,
|
||||||
_phantom: std::marker::PhantomData,
|
})),
|
||||||
})),
|
|
||||||
FatbincWrapper::VERSION_V1 => {
|
FatbincWrapper::VERSION_V1 => {
|
||||||
let header = parse_fatbin_header(&self.wrapper.data)
|
let header =
|
||||||
.map_err(FatbinError::ParseFailure)?;
|
parse_fatbin_header(&self.wrapper.data).map_err(FatbinError::ParseFailure)?;
|
||||||
Ok(FatbinIter::V1(Some(FatbinSubmodule::new(header))))
|
Ok(FatbinIter::V1(Some(FatbinSubmodule::new(header))))
|
||||||
}
|
}
|
||||||
version => Err(FatbinError::ParseFailure(ParseError::UnexpectedBinaryField{
|
version => Err(FatbinError::ParseFailure(
|
||||||
field_name: "FATBINC_VERSION",
|
ParseError::UnexpectedBinaryField {
|
||||||
observed: version,
|
field_name: "FATBINC_VERSION",
|
||||||
expected: [FatbincWrapper::VERSION_V1, FatbincWrapper::VERSION_V2].into(),
|
observed: version,
|
||||||
})),
|
expected: [FatbincWrapper::VERSION_V1, FatbincWrapper::VERSION_V2].into(),
|
||||||
|
},
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -176,7 +177,6 @@ impl<'a> FatbinFile<'a> {
|
|||||||
unsafe { self.get_payload().to_vec() }
|
unsafe { self.get_payload().to_vec() }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
while payload.last() == Some(&0) {
|
while payload.last() == Some(&0) {
|
||||||
// remove trailing zeros
|
// remove trailing zeros
|
||||||
payload.pop();
|
payload.pop();
|
||||||
|
@ -259,12 +259,12 @@ dark_api! {
|
|||||||
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
|
"{C693336E-1121-DF11-A8C3-68F355D89593}" => CONTEXT_LOCAL_STORAGE_INTERFACE_V0301[4] {
|
||||||
[0] = context_local_storage_put(
|
[0] = context_local_storage_put(
|
||||||
context: cuda_types::cuda::CUcontext,
|
context: cuda_types::cuda::CUcontext,
|
||||||
key: *mut std::ffi::c_void,
|
key: *mut std::ffi::c_void,
|
||||||
value: *mut std::ffi::c_void,
|
value: *mut std::ffi::c_void,
|
||||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||||
dtor_cb: Option<extern "system" fn(
|
dtor_cb: Option<extern "system" fn(
|
||||||
cuda_types::cuda::CUcontext,
|
cuda_types::cuda::CUcontext,
|
||||||
*mut std::ffi::c_void,
|
*mut std::ffi::c_void,
|
||||||
*mut std::ffi::c_void,
|
*mut std::ffi::c_void,
|
||||||
)>
|
)>
|
||||||
) -> cuda_types::cuda::CUresult,
|
) -> cuda_types::cuda::CUresult,
|
||||||
|
2
ext/amd_comgr-sys/src/lib.rs
vendored
2
ext/amd_comgr-sys/src/lib.rs
vendored
@ -1,4 +1,4 @@
|
|||||||
#[allow(warnings)]
|
#[allow(warnings)]
|
||||||
pub mod comgr2;
|
pub mod comgr2;
|
||||||
#[allow(warnings)]
|
#[allow(warnings)]
|
||||||
pub mod comgr3;
|
pub mod comgr3;
|
||||||
|
1
ext/hip_runtime-sys/.rustfmt.toml
vendored
Normal file
1
ext/hip_runtime-sys/.rustfmt.toml
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
disable_all_formatting = true
|
1
ext/rocblas-sys/.rustfmt.toml
vendored
Normal file
1
ext/rocblas-sys/.rustfmt.toml
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
disable_all_formatting = true
|
1
format/.rustfmt.toml
Normal file
1
format/.rustfmt.toml
Normal file
@ -0,0 +1 @@
|
|||||||
|
disable_all_formatting = true
|
@ -37,4 +37,4 @@ impl CudaDisplay for FatbinHeader {
|
|||||||
CudaDisplay::write(&self.files_size, "", 0, writer)?;
|
CudaDisplay::write(&self.files_size, "", 0, writer)?;
|
||||||
writer.write_all(b" }")
|
writer.write_all(b" }")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
2635
format/src/lib.rs
2635
format/src/lib.rs
File diff suppressed because it is too large
Load Diff
@ -4,4 +4,3 @@ mod test;
|
|||||||
|
|
||||||
pub use pass::to_llvm_module;
|
pub use pass::to_llvm_module;
|
||||||
pub use pass::Attributes;
|
pub use pass::Attributes;
|
||||||
|
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
use std::ffi::CStr;
|
use std::ffi::CStr;
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use super::super::*;
|
use super::super::*;
|
||||||
use llvm_zluda::{core::*};
|
use super::*;
|
||||||
|
use llvm_zluda::core::*;
|
||||||
|
|
||||||
pub(crate) fn run(context: &Context, attributes: Attributes) -> Result<llvm::Module, TranslateError> {
|
pub(crate) fn run(
|
||||||
|
context: &Context,
|
||||||
|
attributes: Attributes,
|
||||||
|
) -> Result<llvm::Module, TranslateError> {
|
||||||
let module = llvm::Module::new(context, LLVM_UNNAMED);
|
let module = llvm::Module::new(context, LLVM_UNNAMED);
|
||||||
|
|
||||||
emit_attribute(context, &module, "clock_rate", attributes.clock_rate)?;
|
emit_attribute(context, &module, "clock_rate", attributes.clock_rate)?;
|
||||||
@ -16,7 +19,12 @@ pub(crate) fn run(context: &Context, attributes: Attributes) -> Result<llvm::Mod
|
|||||||
Ok(module)
|
Ok(module)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn emit_attribute(context: &Context, module: &llvm::Module, name: &str, attribute: u32) -> Result<(), TranslateError> {
|
fn emit_attribute(
|
||||||
|
context: &Context,
|
||||||
|
module: &llvm::Module,
|
||||||
|
name: &str,
|
||||||
|
attribute: u32,
|
||||||
|
) -> Result<(), TranslateError> {
|
||||||
let name = format!("{}attribute_{}\0", ZLUDA_PTX_PREFIX, name).to_ascii_uppercase();
|
let name = format!("{}attribute_{}\0", ZLUDA_PTX_PREFIX, name).to_ascii_uppercase();
|
||||||
let name = unsafe { CStr::from_bytes_with_nul_unchecked(name.as_bytes()) };
|
let name = unsafe { CStr::from_bytes_with_nul_unchecked(name.as_bytes()) };
|
||||||
let attribute_type = get_scalar_type(context.get(), ast::ScalarType::U32);
|
let attribute_type = get_scalar_type(context.get(), ast::ScalarType::U32);
|
||||||
@ -31,4 +39,4 @@ fn emit_attribute(context: &Context, module: &llvm::Module, name: &str, attribut
|
|||||||
unsafe { LLVMSetInitializer(global, LLVMConstInt(attribute_type, attribute as u64, 0)) };
|
unsafe { LLVMSetInitializer(global, LLVMConstInt(attribute_type, attribute as u64, 0)) };
|
||||||
unsafe { LLVMSetGlobalConstant(global, 1) };
|
unsafe { LLVMSetGlobalConstant(global, 1) };
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
pub(super) mod emit;
|
|
||||||
pub(super) mod attributes;
|
pub(super) mod attributes;
|
||||||
|
pub(super) mod emit;
|
||||||
|
|
||||||
use std::ffi::CStr;
|
use std::ffi::CStr;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
@ -44,9 +44,7 @@ pub struct Module(LLVMModuleRef);
|
|||||||
|
|
||||||
impl Module {
|
impl Module {
|
||||||
fn new(ctx: &Context, name: &CStr) -> Self {
|
fn new(ctx: &Context, name: &CStr) -> Self {
|
||||||
Self(
|
Self(unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) })
|
||||||
unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) },
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get(&self) -> LLVMModuleRef {
|
fn get(&self) -> LLVMModuleRef {
|
||||||
|
1929
ptx/src/pass/mod.rs
1929
ptx/src/pass/mod.rs
File diff suppressed because it is too large
Load Diff
@ -21,7 +21,9 @@ pub(crate) fn run(
|
|||||||
for directive in directives.iter_mut() {
|
for directive in directives.iter_mut() {
|
||||||
let (body_ref, is_kernel) = match directive {
|
let (body_ref, is_kernel) = match directive {
|
||||||
Directive2::Method(Function2 {
|
Directive2::Method(Function2 {
|
||||||
body: Some(body), is_kernel, ..
|
body: Some(body),
|
||||||
|
is_kernel,
|
||||||
|
..
|
||||||
}) => (body, *is_kernel),
|
}) => (body, *is_kernel),
|
||||||
_ => continue,
|
_ => continue,
|
||||||
};
|
};
|
||||||
|
@ -9,7 +9,9 @@ fn parse_and_assert(ptx_text: &str) {
|
|||||||
|
|
||||||
fn compile_and_assert(ptx_text: &str) -> Result<(), TranslateError> {
|
fn compile_and_assert(ptx_text: &str) -> Result<(), TranslateError> {
|
||||||
let ast = ast::parse_module_checked(ptx_text).unwrap();
|
let ast = ast::parse_module_checked(ptx_text).unwrap();
|
||||||
let attributes = pass::Attributes { clock_rate: 2124000 };
|
let attributes = pass::Attributes {
|
||||||
|
clock_rate: 2124000,
|
||||||
|
};
|
||||||
crate::to_llvm_module(ast, attributes)?;
|
crate::to_llvm_module(ast, attributes)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -3581,7 +3581,7 @@ derive_parser!(
|
|||||||
state.errors.push(PtxError::SyntaxError);
|
state.errors.push(PtxError::SyntaxError);
|
||||||
CpAsyncCpSize::Bytes4
|
CpAsyncCpSize::Bytes4
|
||||||
});
|
});
|
||||||
|
|
||||||
let src_size = src_size
|
let src_size = src_size
|
||||||
.and_then(|op| op.as_immediate())
|
.and_then(|op| op.as_immediate())
|
||||||
.and_then(|imm| imm.as_u64());
|
.and_then(|imm| imm.as_u64());
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
|||||||
|
use super::{context, driver};
|
||||||
use cuda_types::cuda::*;
|
use cuda_types::cuda::*;
|
||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
use std::{mem, ptr};
|
use std::{mem, ptr};
|
||||||
use super::{driver, context};
|
|
||||||
|
|
||||||
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
|
||||||
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
|
||||||
@ -462,22 +462,21 @@ fn clamp_usize(x: usize) -> i32 {
|
|||||||
usize::min(x, i32::MAX as usize) as i32
|
usize::min(x, i32::MAX as usize) as i32
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_primary_context(hip_dev: hipDevice_t) -> Result<(&'static context::Context, CUcontext), CUerror> {
|
pub(crate) fn get_primary_context(
|
||||||
|
hip_dev: hipDevice_t,
|
||||||
|
) -> Result<(&'static context::Context, CUcontext), CUerror> {
|
||||||
let dev: &'static driver::Device = driver::device(hip_dev)?;
|
let dev: &'static driver::Device = driver::device(hip_dev)?;
|
||||||
Ok(dev.primary_context())
|
Ok(dev.primary_context())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn primary_context_retain(
|
pub(crate) fn primary_context_retain(pctx: &mut CUcontext, hip_dev: hipDevice_t) -> CUresult {
|
||||||
pctx: &mut CUcontext,
|
|
||||||
hip_dev: hipDevice_t,
|
|
||||||
) -> CUresult {
|
|
||||||
let (ctx, cu_ctx) = get_primary_context(hip_dev)?;
|
let (ctx, cu_ctx) = get_primary_context(hip_dev)?;
|
||||||
|
|
||||||
ctx.with_state_mut(|state: &mut context::ContextState| {
|
ctx.with_state_mut(|state: &mut context::ContextState| {
|
||||||
state.ref_count += 1;
|
state.ref_count += 1;
|
||||||
Ok(())
|
Ok(())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
*pctx = cu_ctx;
|
*pctx = cu_ctx;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -497,8 +496,6 @@ pub(crate) fn primary_context_release(hip_dev: hipDevice_t) -> CUresult {
|
|||||||
|
|
||||||
pub(crate) fn primary_context_reset(hip_dev: hipDevice_t) -> CUresult {
|
pub(crate) fn primary_context_reset(hip_dev: hipDevice_t) -> CUresult {
|
||||||
let (ctx, _) = get_primary_context(hip_dev)?;
|
let (ctx, _) = get_primary_context(hip_dev)?;
|
||||||
ctx.with_state_mut(|state| {
|
ctx.with_state_mut(|state| state.reset())?;
|
||||||
state.reset()
|
|
||||||
})?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -38,10 +38,7 @@ pub(crate) unsafe fn unload(library: CUlibrary) -> CUresult {
|
|||||||
super::drop_checked::<Library>(library)
|
super::drop_checked::<Library>(library)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn get_module(
|
pub(crate) unsafe fn get_module(out: &mut CUmodule, library: &Library) -> CUresult {
|
||||||
out: &mut CUmodule,
|
*out = module::Module { base: library.base }.wrap();
|
||||||
library: &Library,
|
|
||||||
) -> CUresult {
|
|
||||||
*out = module::Module{base: library.base}.wrap();
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -68,7 +68,9 @@ pub(crate) fn load_hip_module(image: *const std::ffi::c_void) -> Result<hipModul
|
|||||||
unsafe { hipCtxGetDevice(&mut dev) }?;
|
unsafe { hipCtxGetDevice(&mut dev) }?;
|
||||||
let mut props = unsafe { mem::zeroed() };
|
let mut props = unsafe { mem::zeroed() };
|
||||||
unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
|
unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
|
||||||
let attributes = ptx::Attributes { clock_rate: props.clockRate as u32 };
|
let attributes = ptx::Attributes {
|
||||||
|
clock_rate: props.clockRate as u32,
|
||||||
|
};
|
||||||
let llvm_module = ptx::to_llvm_module(ast, attributes).map_err(|_| CUerror::UNKNOWN)?;
|
let llvm_module = ptx::to_llvm_module(ast, attributes).map_err(|_| CUerror::UNKNOWN)?;
|
||||||
let elf_module = comgr::compile_bitcode(
|
let elf_module = comgr::compile_bitcode(
|
||||||
&global_state.comgr,
|
&global_state.comgr,
|
||||||
@ -91,7 +93,6 @@ pub(crate) fn load_data(module: &mut CUmodule, image: &std::ffi::c_void) -> CUre
|
|||||||
|
|
||||||
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
|
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
|
||||||
super::drop_checked::<Module>(hmod)
|
super::drop_checked::<Module>(hmod)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_function(
|
pub(crate) fn get_function(
|
||||||
|
116
zluda/src/lib.rs
116
zluda/src/lib.rs
@ -1,11 +1,10 @@
|
|||||||
use cuda_types::cuda::CUerror;
|
use cuda_types::cuda::CUerror;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
|
pub(crate) mod r#impl;
|
||||||
#[cfg_attr(windows, path = "os_win.rs")]
|
#[cfg_attr(windows, path = "os_win.rs")]
|
||||||
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
#[cfg_attr(not(windows), path = "os_unix.rs")]
|
||||||
mod os;
|
mod os;
|
||||||
pub(crate) mod r#impl;
|
|
||||||
|
|
||||||
static INITIALIZED: AtomicBool = AtomicBool::new(true);
|
static INITIALIZED: AtomicBool = AtomicBool::new(true);
|
||||||
pub(crate) fn initialized() -> bool {
|
pub(crate) fn initialized() -> bool {
|
||||||
@ -66,61 +65,60 @@ macro_rules! implemented_in_function {
|
|||||||
|
|
||||||
cuda_macros::cuda_function_declarations!(
|
cuda_macros::cuda_function_declarations!(
|
||||||
unimplemented,
|
unimplemented,
|
||||||
implemented <= [
|
implemented
|
||||||
cuCtxCreate_v2,
|
<= [
|
||||||
cuCtxDestroy_v2,
|
cuCtxCreate_v2,
|
||||||
cuCtxGetLimit,
|
cuCtxDestroy_v2,
|
||||||
cuCtxSetCurrent,
|
cuCtxGetLimit,
|
||||||
cuCtxGetCurrent,
|
cuCtxSetCurrent,
|
||||||
cuCtxGetDevice,
|
cuCtxGetCurrent,
|
||||||
cuCtxSetLimit,
|
cuCtxGetDevice,
|
||||||
cuCtxSynchronize,
|
cuCtxSetLimit,
|
||||||
cuCtxPushCurrent,
|
cuCtxSynchronize,
|
||||||
cuCtxPushCurrent_v2,
|
cuCtxPushCurrent,
|
||||||
cuCtxPopCurrent,
|
cuCtxPushCurrent_v2,
|
||||||
cuCtxPopCurrent_v2,
|
cuCtxPopCurrent,
|
||||||
cuDeviceComputeCapability,
|
cuCtxPopCurrent_v2,
|
||||||
cuDeviceGet,
|
cuDeviceComputeCapability,
|
||||||
cuDeviceGetAttribute,
|
cuDeviceGet,
|
||||||
cuDeviceGetCount,
|
cuDeviceGetAttribute,
|
||||||
cuDeviceGetLuid,
|
cuDeviceGetCount,
|
||||||
cuDeviceGetName,
|
cuDeviceGetLuid,
|
||||||
cuDeviceGetProperties,
|
cuDeviceGetName,
|
||||||
cuDeviceGetUuid,
|
cuDeviceGetProperties,
|
||||||
cuDeviceGetUuid_v2,
|
cuDeviceGetUuid,
|
||||||
cuDevicePrimaryCtxRelease,
|
cuDeviceGetUuid_v2,
|
||||||
cuDevicePrimaryCtxRetain,
|
cuDevicePrimaryCtxRelease,
|
||||||
cuDevicePrimaryCtxReset,
|
cuDevicePrimaryCtxRetain,
|
||||||
cuDeviceTotalMem_v2,
|
cuDevicePrimaryCtxReset,
|
||||||
cuDriverGetVersion,
|
cuDeviceTotalMem_v2,
|
||||||
cuFuncGetAttribute,
|
cuDriverGetVersion,
|
||||||
cuGetExportTable,
|
cuFuncGetAttribute,
|
||||||
cuGetProcAddress,
|
cuGetExportTable,
|
||||||
cuGetProcAddress_v2,
|
cuGetProcAddress,
|
||||||
cuInit,
|
cuGetProcAddress_v2,
|
||||||
cuLibraryLoadData,
|
cuInit,
|
||||||
cuLibraryGetModule,
|
cuLibraryLoadData,
|
||||||
cuLibraryUnload,
|
cuLibraryGetModule,
|
||||||
cuMemAlloc_v2,
|
cuLibraryUnload,
|
||||||
cuMemFree_v2,
|
cuMemAlloc_v2,
|
||||||
cuMemHostAlloc,
|
cuMemFree_v2,
|
||||||
cuMemFreeHost,
|
cuMemHostAlloc,
|
||||||
cuMemGetAddressRange_v2,
|
cuMemFreeHost,
|
||||||
cuMemGetInfo_v2,
|
cuMemGetAddressRange_v2,
|
||||||
cuMemcpyDtoH_v2,
|
cuMemGetInfo_v2,
|
||||||
cuMemcpyHtoD_v2,
|
cuMemcpyDtoH_v2,
|
||||||
cuMemsetD32_v2,
|
cuMemcpyHtoD_v2,
|
||||||
cuMemsetD8_v2,
|
cuMemsetD32_v2,
|
||||||
cuModuleGetFunction,
|
cuMemsetD8_v2,
|
||||||
cuModuleGetLoadingMode,
|
cuModuleGetFunction,
|
||||||
cuModuleLoadData,
|
cuModuleGetLoadingMode,
|
||||||
cuModuleUnload,
|
cuModuleLoadData,
|
||||||
cuPointerGetAttribute,
|
cuModuleUnload,
|
||||||
cuStreamSynchronize,
|
cuPointerGetAttribute,
|
||||||
cuProfilerStart,
|
cuStreamSynchronize,
|
||||||
cuProfilerStop,
|
cuProfilerStart,
|
||||||
],
|
cuProfilerStop,
|
||||||
implemented_in_function <= [
|
],
|
||||||
cuLaunchKernel,
|
implemented_in_function <= [cuLaunchKernel,]
|
||||||
]
|
|
||||||
);
|
);
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
|
@ -10,15 +10,11 @@ pub(crate) fn unimplemented() -> cublasStatus_t {
|
|||||||
cublasStatus_t::ERROR_NOT_SUPPORTED
|
cublasStatus_t::ERROR_NOT_SUPPORTED
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_status_name(
|
pub(crate) fn get_status_name(_status: cublasStatus_t) -> *const ::core::ffi::c_char {
|
||||||
_status: cublasStatus_t,
|
|
||||||
) -> *const ::core::ffi::c_char {
|
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_status_string(
|
pub(crate) fn get_status_string(_status: cublasStatus_t) -> *const ::core::ffi::c_char {
|
||||||
_status: cublasStatus_t,
|
|
||||||
) -> *const ::core::ffi::c_char {
|
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,10 +28,11 @@ macro_rules! implemented {
|
|||||||
|
|
||||||
cuda_macros::cublas_function_declarations!(
|
cuda_macros::cublas_function_declarations!(
|
||||||
unimplemented,
|
unimplemented,
|
||||||
implemented <= [
|
implemented
|
||||||
cublasGetStatusName,
|
<= [
|
||||||
cublasGetStatusString,
|
cublasGetStatusName,
|
||||||
cublasXerbla,
|
cublasGetStatusString,
|
||||||
cublasGetCudartVersion,
|
cublasXerbla,
|
||||||
]
|
cublasGetCudartVersion
|
||||||
|
]
|
||||||
);
|
);
|
||||||
|
@ -31,8 +31,6 @@ pub(crate) fn get_cudart_version() -> usize {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
pub(crate) fn disable_cpu_instructions_set_mask(
|
pub(crate) fn disable_cpu_instructions_set_mask(_mask: ::core::ffi::c_uint) -> ::core::ffi::c_uint {
|
||||||
_mask: ::core::ffi::c_uint,
|
|
||||||
) -> ::core::ffi::c_uint {
|
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
@ -28,11 +28,12 @@ macro_rules! implemented {
|
|||||||
|
|
||||||
cuda_macros::cublaslt_function_declarations!(
|
cuda_macros::cublaslt_function_declarations!(
|
||||||
unimplemented,
|
unimplemented,
|
||||||
implemented <= [
|
implemented
|
||||||
cublasLtGetStatusName,
|
<= [
|
||||||
cublasLtGetStatusString,
|
cublasLtGetStatusName,
|
||||||
cublasLtDisableCpuInstructionsSetMask,
|
cublasLtGetStatusString,
|
||||||
cublasLtGetVersion,
|
cublasLtDisableCpuInstructionsSetMask,
|
||||||
cublasLtGetCudartVersion
|
cublasLtGetVersion,
|
||||||
]
|
cublasLtGetCudartVersion
|
||||||
|
]
|
||||||
);
|
);
|
||||||
|
@ -28,11 +28,12 @@ macro_rules! implemented {
|
|||||||
|
|
||||||
cuda_macros::cudnn9_function_declarations!(
|
cuda_macros::cudnn9_function_declarations!(
|
||||||
unimplemented,
|
unimplemented,
|
||||||
implemented <= [
|
implemented
|
||||||
cudnnGetVersion,
|
<= [
|
||||||
cudnnGetMaxDeviceVersion,
|
cudnnGetVersion,
|
||||||
cudnnGetCudartVersion,
|
cudnnGetMaxDeviceVersion,
|
||||||
cudnnGetErrorString,
|
cudnnGetCudartVersion,
|
||||||
cudnnGetLastErrorString
|
cudnnGetErrorString,
|
||||||
]
|
cudnnGetLastErrorString
|
||||||
|
]
|
||||||
);
|
);
|
||||||
|
@ -420,13 +420,13 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApiDump {
|
|||||||
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301 {
|
CONTEXT_LOCAL_STORAGE_INTERFACE_V0301 {
|
||||||
[0] = context_local_storage_put(
|
[0] = context_local_storage_put(
|
||||||
context: cuda_types::cuda::CUcontext,
|
context: cuda_types::cuda::CUcontext,
|
||||||
key: *mut std::ffi::c_void,
|
key: *mut std::ffi::c_void,
|
||||||
value: *mut std::ffi::c_void,
|
value: *mut std::ffi::c_void,
|
||||||
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
// clsContextDestroyCallback, have to be called on cuDevicePrimaryCtxReset
|
||||||
dtor_cb: Option<extern "system" fn(
|
dtor_cb: Option<extern "system" fn(
|
||||||
cuda_types::cuda::CUcontext,
|
cuda_types::cuda::CUcontext,
|
||||||
*mut std::ffi::c_void,
|
*mut std::ffi::c_void,
|
||||||
*mut std::ffi::c_void,
|
*mut std::ffi::c_void,
|
||||||
)>
|
)>
|
||||||
) -> cuda_types::cuda::CUresult,
|
) -> cuda_types::cuda::CUresult,
|
||||||
[1] = context_local_storage_delete(
|
[1] = context_local_storage_delete(
|
||||||
@ -434,9 +434,9 @@ impl ::dark_api::cuda::CudaDarkApi for DarkApiDump {
|
|||||||
key: *mut std::ffi::c_void
|
key: *mut std::ffi::c_void
|
||||||
) -> cuda_types::cuda::CUresult,
|
) -> cuda_types::cuda::CUresult,
|
||||||
[2] = context_local_storage_get(
|
[2] = context_local_storage_get(
|
||||||
value: *mut *mut std::ffi::c_void,
|
value: *mut *mut std::ffi::c_void,
|
||||||
cu_ctx: cuda_types::cuda::CUcontext,
|
cu_ctx: cuda_types::cuda::CUcontext,
|
||||||
key: *mut std::ffi::c_void
|
key: *mut std::ffi::c_void
|
||||||
) -> cuda_types::cuda::CUresult
|
) -> cuda_types::cuda::CUresult
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,4 @@ macro_rules! unimplemented {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
cuda_macros::cufft_function_declarations!(
|
cuda_macros::cufft_function_declarations!(unimplemented);
|
||||||
unimplemented
|
|
||||||
);
|
|
||||||
|
@ -1,41 +1,39 @@
|
|||||||
use cuda_types::nvml::*;
|
use cuda_types::nvml::*;
|
||||||
use std::{ffi::CStr, ptr};
|
use std::{ffi::CStr, ptr};
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(debug_assertions))]
|
#[cfg(not(debug_assertions))]
|
||||||
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
pub(crate) fn unimplemented() -> nvmlReturn_t {
|
||||||
nvmlReturn_t::ERROR_NOT_SUPPORTED
|
nvmlReturn_t::ERROR_NOT_SUPPORTED
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn error_string(
|
pub(crate) fn error_string(_result: cuda_types::nvml::nvmlReturn_t) -> *const ::core::ffi::c_char {
|
||||||
_result: cuda_types::nvml::nvmlReturn_t,
|
c"".as_ptr()
|
||||||
) -> *const ::core::ffi::c_char {
|
}
|
||||||
c"".as_ptr()
|
|
||||||
}
|
pub(crate) fn init_v2() -> cuda_types::nvml::nvmlReturn_t {
|
||||||
|
nvmlReturn_t::SUCCESS
|
||||||
pub(crate) fn init_v2() -> cuda_types::nvml::nvmlReturn_t {
|
}
|
||||||
nvmlReturn_t::SUCCESS
|
|
||||||
}
|
const VERSION: &'static CStr = c"550.77";
|
||||||
|
|
||||||
const VERSION: &'static CStr = c"550.77";
|
pub(crate) fn system_get_driver_version(
|
||||||
|
result: *mut ::core::ffi::c_char,
|
||||||
pub(crate) fn system_get_driver_version(
|
length: ::core::ffi::c_uint,
|
||||||
result: *mut ::core::ffi::c_char,
|
) -> cuda_types::nvml::nvmlReturn_t {
|
||||||
length: ::core::ffi::c_uint,
|
if result == ptr::null_mut() {
|
||||||
) -> cuda_types::nvml::nvmlReturn_t {
|
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
|
||||||
if result == ptr::null_mut() {
|
}
|
||||||
return nvmlReturn_t::ERROR_INVALID_ARGUMENT;
|
let version = VERSION.to_bytes_with_nul();
|
||||||
}
|
let copy_length = usize::min(length as usize, version.len());
|
||||||
let version = VERSION.to_bytes_with_nul();
|
let slice = unsafe { std::slice::from_raw_parts_mut(result.cast(), copy_length) };
|
||||||
let copy_length = usize::min(length as usize, version.len());
|
slice.copy_from_slice(&version[..copy_length]);
|
||||||
let slice = unsafe { std::slice::from_raw_parts_mut(result.cast(), copy_length) };
|
if let Some(null) = slice.last_mut() {
|
||||||
slice.copy_from_slice(&version[..copy_length]);
|
*null = 0;
|
||||||
if let Some(null) = slice.last_mut() {
|
}
|
||||||
*null = 0;
|
nvmlReturn_t::SUCCESS
|
||||||
}
|
}
|
||||||
nvmlReturn_t::SUCCESS
|
|
||||||
}
|
|
||||||
|
@ -26,9 +26,5 @@ macro_rules! implemented_fn {
|
|||||||
|
|
||||||
cuda_macros::nvml_function_declarations!(
|
cuda_macros::nvml_function_declarations!(
|
||||||
unimplemented_fn,
|
unimplemented_fn,
|
||||||
implemented_fn <= [
|
implemented_fn <= [nvmlErrorString, nvmlInit_v2, nvmlSystemGetDriverVersion]
|
||||||
nvmlErrorString,
|
|
||||||
nvmlInit_v2,
|
|
||||||
nvmlSystemGetDriverVersion
|
|
||||||
]
|
|
||||||
);
|
);
|
||||||
|
@ -28,12 +28,13 @@ macro_rules! implemented {
|
|||||||
|
|
||||||
cuda_macros::cusparse_function_declarations!(
|
cuda_macros::cusparse_function_declarations!(
|
||||||
unimplemented,
|
unimplemented,
|
||||||
implemented <= [
|
implemented
|
||||||
cusparseGetErrorName,
|
<= [
|
||||||
cusparseGetErrorString,
|
cusparseGetErrorName,
|
||||||
cusparseGetMatIndexBase,
|
cusparseGetErrorString,
|
||||||
cusparseGetMatDiagType,
|
cusparseGetMatIndexBase,
|
||||||
cusparseGetMatFillMode,
|
cusparseGetMatDiagType,
|
||||||
cusparseGetMatType
|
cusparseGetMatFillMode,
|
||||||
]
|
cusparseGetMatType
|
||||||
|
]
|
||||||
);
|
);
|
||||||
|
Reference in New Issue
Block a user