Fix host code and update to CUDA 12.4 (#299)

This commit is contained in:
Andrzej Janik
2024-12-02 00:29:57 +01:00
committed by GitHub
parent 870fed4bb6
commit 7a6df9dcbf
71 changed files with 68561 additions and 56362 deletions

View File

@ -8,16 +8,28 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninter
python3 \
ripgrep \
git \
ltrace
ltrace \
# required by llvm 17
lsb-release software-properties-common gnupg
ARG LLVM_VERSION=17
RUN wget https://apt.llvm.org/llvm.sh && \
chmod +x llvm.sh && \
./llvm.sh ${LLVM_VERSION}
# Feel free to change to a newer version if you have a newer verison on your host
ARG CUDA_VERSION=12-4
ARG CUDA_PKG_VERSION=12-4
# Docker <-> host driver version compatiblity is newer host <-> older docker
# We don't care about a specific driver version, so pick oldest 5XX
ARG CUDA_DRIVER=515
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# CUDA headers need it for interop
libgl-dev libegl-dev libvdpau-dev \
nvidia-utils-${CUDA_DRIVER} \
cuda-cudart-${CUDA_VERSION}
cuda-cudart-dev-${CUDA_PKG_VERSION} \
cuda-cudart-${CUDA_PKG_VERSION} \
cuda-profiler-api-${CUDA_PKG_VERSION} \
cuda-nvcc-${CUDA_PKG_VERSION}
ARG ROCM_VERSION=6.2.2
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
@ -29,9 +41,11 @@ RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
rocminfo \
rocm-gdb \
rocm-smi-lib \
hip-runtime-amd && \
rocm-llvm-dev \
hip-runtime-amd && \
hip-dev && \
echo '/opt/rocm/lib' > /etc/ld.so.conf.d/rocm.conf && \
ldconfig
ENV PATH=$PATH:/opt/rocm-6.2.2/bin
ENV PATH=$PATH:/opt/rocm-${ROCM_VERSION}/bin

View File

@ -28,7 +28,7 @@
//"hostRequirements": { "gpu": "optional" }
"customizations": {
"vscode": {
"extensions": [ "mhutchie.git-graph" ],
"extensions": [ "mhutchie.git-graph" ]
}
}
}
}

7
.gitmodules vendored
View File

@ -1,10 +1,3 @@
[submodule "ext/spirv-tools"]
path = ext/spirv-tools
url = https://github.com/KhronosGroup/SPIRV-Tools
branch = master
[submodule "ext/spirv-headers"]
path = ext/spirv-headers
url = https://github.com/KhronosGroup/SPIRV-Headers
[submodule "ext/llvm-project"]
path = ext/llvm-project
url = https://github.com/llvm/llvm-project

View File

@ -9,12 +9,8 @@ members = [
"cuda_base",
"cuda_types",
"detours-sys",
"level_zero-sys",
"level_zero",
"spirv_tools-sys",
"zluda",
"zluda_dump",
"zluda_lib",
"zluda_inject",
"zluda_redirect",
"zluda_ml",
@ -22,6 +18,7 @@ members = [
"ptx_parser",
"ptx_parser_macros",
"ptx_parser_macros_impl",
"zluda_bindgen",
]
default-members = ["zluda_lib", "zluda_ml", "zluda_inject", "zluda_redirect"]
default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect"]

View File

@ -2,11 +2,11 @@
name = "cuda_base"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
edition = "2021"
[dependencies]
quote = "1.0"
syn = { version = "1.0", features = ["full", "visit-mut"] }
syn = { version = "2.0", features = ["full", "visit-mut"] }
proc-macro2 = "1.0"
rustc-hash = "1.1.0"

View File

@ -1 +0,0 @@
bindgen build/wrapper.h -o src/cuda.rs --no-partialeq "CUDA_HOST_NODE_PARAMS_st" --with-derive-eq --whitelist-function="^cu.*" --whitelist-var="^CU.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug --new-type-alias "^CUdevice_v\d+$|^CUdeviceptr_v\d+$" --must-use-type "cudaError_enum" -- -I/usr/local/cuda/include

View File

@ -1,3 +1,7 @@
#define __CUDA_API_VERSION_INTERNAL
#include <cuda.h>
#include <cudaProfiler.h>
#include <cudaGL.h>
#include <cudaEGL.h>
#include <vdpau/vdpau.h>
#include <cudaVDPAU.h>

File diff suppressed because it is too large Load Diff

View File

@ -1,110 +1,25 @@
extern crate proc_macro;
use std::collections::hash_map;
use std::iter;
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::{format_ident, quote, ToTokens};
use rustc_hash::{FxHashMap, FxHashSet};
use quote::{quote, ToTokens};
use rustc_hash::FxHashMap;
use std::iter;
use syn::parse::{Parse, ParseStream};
use syn::punctuated::Punctuated;
use syn::visit_mut::VisitMut;
use syn::{
bracketed, parse_macro_input, Abi, Fields, File, FnArg, ForeignItem, ForeignItemFn, Ident,
Item, ItemForeignMod, LitStr, PatType, Path, PathArguments, PathSegment, ReturnType, Signature,
Token, Type, TypeArray, TypePath, TypePtr,
bracketed, parse_macro_input, File, ForeignItem, ForeignItemFn, Ident, Item, Path, Signature,
Token,
};
const CUDA_RS: &'static str = include_str! {"cuda.rs"};
// This macro copies cuda.rs as-is with some changes:
// * All function declarations are filtered out
// * CUdeviceptr_v2 is redefined from `unsigned long long` to `*void`
// * `extern "C"` gets replaced by `extern "system"`
// * CUuuid_st is redefined to use uchar instead of char
#[proc_macro]
pub fn cuda_type_declarations(_: TokenStream) -> TokenStream {
let mut cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
cuda_module.items = cuda_module
.items
.into_iter()
.filter_map(|item| match item {
Item::ForeignMod(_) => None,
Item::Struct(mut struct_) => {
if "CUdeviceptr_v2" == struct_.ident.to_string() {
match &mut struct_.fields {
Fields::Unnamed(ref mut fields) => {
fields.unnamed[0].ty =
absolute_path_to_mut_ptr(&["std", "os", "raw", "c_void"])
}
_ => unreachable!(),
}
} else if "CUuuid_st" == struct_.ident.to_string() {
match &mut struct_.fields {
Fields::Named(ref mut fields) => match fields.named[0].ty {
Type::Array(TypeArray { ref mut elem, .. }) => {
*elem = Box::new(Type::Path(TypePath {
qself: None,
path: segments_to_path(&["std", "os", "raw", "c_uchar"]),
}))
}
_ => unreachable!(),
},
_ => panic!(),
}
}
Some(Item::Struct(struct_))
}
i => Some(i),
})
.collect::<Vec<_>>();
syn::visit_mut::visit_file_mut(&mut FixAbi, &mut cuda_module);
cuda_module.into_token_stream().into()
}
fn segments_to_path(path: &[&'static str]) -> Path {
let mut segments = Punctuated::new();
for ident in path {
let ident = PathSegment {
ident: Ident::new(ident, Span::call_site()),
arguments: PathArguments::None,
};
segments.push(ident);
}
Path {
leading_colon: Some(Token![::](Span::call_site())),
segments,
}
}
fn absolute_path_to_mut_ptr(path: &[&'static str]) -> Type {
Type::Ptr(TypePtr {
star_token: Token![*](Span::call_site()),
const_token: None,
mutability: Some(Token![mut](Span::call_site())),
elem: Box::new(Type::Path(TypePath {
qself: None,
path: segments_to_path(path),
})),
})
}
struct FixAbi;
impl VisitMut for FixAbi {
fn visit_abi_mut(&mut self, i: &mut Abi) {
if let Some(ref mut name) = i.name {
*name = LitStr::new("system", Span::call_site());
}
}
}
// This macro accepts following arguments:
// * `type_path`: path to the module with type definitions (in the module tree)
// * `normal_macro`: ident for a normal macro
// * `override_macro`: ident for an override macro
// * `override_fns`: list of override functions
// * zero or more:
// * `override_macro`: ident for an override macro
// * `override_fns`: list of override functions
// Then macro goes through every function in rust.rs, and for every fn `foo`:
// * if `foo` is contained in `override_fns` then pass it into `override_macro`
// * if `foo` is not contained in `override_fns` pass it to `normal_macro`
@ -117,390 +32,191 @@ impl VisitMut for FixAbi {
#[proc_macro]
pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
let input = parse_macro_input!(tokens as FnDeclInput);
let cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
let override_fns = input
.override_fns
.iter()
.map(ToString::to_string)
.collect::<FxHashSet<_>>();
let (normal_macro_args, override_macro_args): (Vec<_>, Vec<_>) = cuda_module
.items
.into_iter()
.filter_map(|item| match item {
Item::ForeignMod(ItemForeignMod { mut items, .. }) => match items.pop().unwrap() {
ForeignItem::Fn(ForeignItemFn {
sig:
Signature {
ident,
inputs,
output,
..
},
..
}) => {
let use_normal_macro = !override_fns.contains(&ident.to_string());
let inputs = inputs
.into_iter()
.map(|fn_arg| match fn_arg {
FnArg::Typed(mut pat_type) => {
pat_type.ty =
prepend_cuda_path_to_type(&input.type_path, pat_type.ty);
FnArg::Typed(pat_type)
}
_ => unreachable!(),
})
.collect::<Punctuated<_, Token![,]>>();
let output = match output {
ReturnType::Type(_, type_) => type_,
ReturnType::Default => unreachable!(),
};
let type_path = input.type_path.clone();
Some((
quote! {
"system" fn #ident(#inputs) -> #type_path :: #output
},
use_normal_macro,
))
}
_ => unreachable!(),
},
_ => None,
})
.partition(|(_, use_normal_macro)| *use_normal_macro);
let mut result = proc_macro2::TokenStream::new();
if !normal_macro_args.is_empty() {
let punctuated_normal_macro_args = to_punctuated::<Token![;]>(normal_macro_args);
let macro_ = &input.normal_macro;
result.extend(iter::once(quote! {
#macro_ ! (#punctuated_normal_macro_args);
}));
let mut choose_macro = ChooseMacro::new(input);
let mut cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
syn::visit_mut::visit_file_mut(&mut FixFnSignatures, &mut cuda_module);
let extern_ = if let Item::ForeignMod(extern_) = cuda_module.items.pop().unwrap() {
extern_
} else {
unreachable!()
};
let abi = extern_.abi.name;
for mut item in extern_.items {
if let ForeignItem::Fn(ForeignItemFn {
sig: Signature { ref ident, .. },
ref mut attrs,
..
}) = item
{
*attrs = Vec::new();
choose_macro.add(ident, quote! { #abi #item });
} else {
unreachable!()
}
}
if !override_macro_args.is_empty() {
let punctuated_override_macro_args = to_punctuated::<Token![;]>(override_macro_args);
let macro_ = &input.override_macro;
result.extend(iter::once(quote! {
#macro_ ! (#punctuated_override_macro_args);
}));
let mut result = proc_macro2::TokenStream::new();
for (path, items) in
iter::once(choose_macro.default).chain(choose_macro.override_sets.into_iter())
{
if items.is_empty() {
continue;
}
quote! {
#path ! { #(#items)* }
}
.to_tokens(&mut result);
}
result.into()
}
fn to_punctuated<P: ToTokens + Default>(
elms: Vec<(proc_macro2::TokenStream, bool)>,
) -> proc_macro2::TokenStream {
let mut collection = Punctuated::<proc_macro2::TokenStream, P>::new();
collection.extend(elms.into_iter().map(|(token_stream, _)| token_stream));
collection.into_token_stream()
}
fn prepend_cuda_path_to_type(base_path: &Path, type_: Box<Type>) -> Box<Type> {
match *type_ {
Type::Path(mut type_path) => {
type_path.path = prepend_cuda_path_to_path(base_path, type_path.path);
Box::new(Type::Path(type_path))
}
Type::Ptr(mut type_ptr) => {
type_ptr.elem = prepend_cuda_path_to_type(base_path, type_ptr.elem);
Box::new(Type::Ptr(type_ptr))
}
_ => unreachable!(),
}
}
fn prepend_cuda_path_to_path(base_path: &Path, path: Path) -> Path {
if path.leading_colon.is_some() {
return path;
}
if path.segments.len() == 1 {
let ident = path.segments[0].ident.to_string();
if ident.starts_with("CU")
|| ident.starts_with("cu")
|| ident.starts_with("GL")
|| ident == "HGPUNV"
{
let mut base_path = base_path.clone();
base_path.segments.extend(path.segments);
return base_path;
}
}
path
}
struct FnDeclInput {
type_path: Path,
normal_macro: Path,
override_macro: Path,
override_fns: Punctuated<Ident, Token![,]>,
overrides: Punctuated<OverrideMacro, Token![,]>,
}
impl Parse for FnDeclInput {
fn parse(input: ParseStream) -> syn::Result<Self> {
let type_path = input.parse::<Path>()?;
input.parse::<Token![,]>()?;
let normal_macro = input.parse::<Path>()?;
input.parse::<Token![,]>()?;
let override_macro = input.parse::<Path>()?;
input.parse::<Token![,]>()?;
let override_fns_content;
bracketed!(override_fns_content in input);
let override_fns = override_fns_content.parse_terminated(Ident::parse)?;
let overrides = if input.is_empty() {
Punctuated::new()
} else {
input.parse::<Token![,]>()?;
input.parse_terminated(OverrideMacro::parse, Token![,])?
};
Ok(Self {
type_path,
normal_macro,
override_macro,
override_fns,
overrides,
})
}
}
// This trait accepts following parameters:
// * `type_path`: path to the module with type definitions (in the module tree)
// * `trait_`: name of the trait to be derived
// * `ignore_types`: bracketed list of types to ignore
// * `ignore_fns`: bracketed list of fns to ignore
#[proc_macro]
pub fn cuda_derive_display_trait(tokens: TokenStream) -> TokenStream {
let input = parse_macro_input!(tokens as DeriveDisplayInput);
let cuda_module = syn::parse_str::<File>(CUDA_RS).unwrap();
let mut derive_state = DeriveDisplayState::new(input);
cuda_module
.items
.into_iter()
.filter_map(|i| cuda_derive_display_trait_for_item(&mut derive_state, i))
.collect::<proc_macro2::TokenStream>()
.into()
struct OverrideMacro {
macro_: Path,
functions: Punctuated<Ident, Token![,]>,
}
fn cuda_derive_display_trait_for_item(
state: &mut DeriveDisplayState,
item: Item,
) -> Option<proc_macro2::TokenStream> {
let path_prefix = &state.type_path;
let path_prefix_iter = iter::repeat(&path_prefix);
let trait_ = &state.trait_;
let trait_iter = iter::repeat(&state.trait_);
match item {
Item::Const(_) => None,
Item::ForeignMod(ItemForeignMod { mut items, .. }) => match items.pop().unwrap() {
ForeignItem::Fn(ForeignItemFn {
sig: Signature { ident, inputs, .. },
..
}) => {
if state.ignore_fns.contains(&ident) {
return None;
}
let inputs = inputs
.into_iter()
.map(|fn_arg| match fn_arg {
FnArg::Typed(mut pat_type) => {
pat_type.ty = prepend_cuda_path_to_type(path_prefix, pat_type.ty);
FnArg::Typed(pat_type)
}
_ => unreachable!(),
})
.collect::<Vec<_>>();
let inputs_iter = inputs.iter();
let mut arg_name_iter = inputs.iter().map(|fn_arg| match fn_arg {
FnArg::Typed(PatType { pat, .. }) => pat,
_ => unreachable!(),
});
let fn_name = format_ident!("write_{}", ident);
let original_fn_name = ident.to_string();
Some(match arg_name_iter.next() {
Some(first_arg_name) => quote! {
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized), #(#inputs_iter,)*) -> std::io::Result<()> {
writer.write_all(concat!("(", stringify!(#first_arg_name), ": ").as_bytes())?;
let mut arg_idx = 0usize;
CudaDisplay::write(&#first_arg_name, #original_fn_name, arg_idx, writer)?;
#(
writer.write_all(b", ")?;
writer.write_all(concat!(stringify!(#arg_name_iter), ": ").as_bytes())?;
CudaDisplay::write(&#arg_name_iter, #original_fn_name, arg_idx, writer)?;
arg_idx += 1;
)*
writer.write_all(b")")
}
},
None => quote! {
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
writer.write_all(b"()")
}
},
})
}
_ => unreachable!(),
},
Item::Impl(mut item_impl) => {
let enum_ = match *(item_impl.self_ty) {
Type::Path(mut path) => path.path.segments.pop().unwrap().into_value().ident,
_ => unreachable!(),
};
let variant_ = match item_impl.items.pop().unwrap() {
syn::ImplItem::Const(item_const) => item_const.ident,
_ => unreachable!(),
};
state.record_enum_variant(enum_, variant_);
None
}
Item::Struct(item_struct) => {
let item_struct_name = item_struct.ident.to_string();
if state.ignore_types.contains(&item_struct.ident) {
return None;
}
if item_struct_name.ends_with("_enum") {
let enum_ = &item_struct.ident;
let enum_iter = iter::repeat(&item_struct.ident);
let variants = state.enums.get(&item_struct.ident).unwrap().iter();
Some(quote! {
impl #trait_ for #path_prefix :: #enum_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
match self {
#(& #path_prefix_iter :: #enum_iter :: #variants => writer.write_all(stringify!(#variants).as_bytes()),)*
_ => write!(writer, "{}", self.0)
}
}
}
})
} else {
let struct_ = &item_struct.ident;
let (first_field, rest_of_fields) = match item_struct.fields {
Fields::Named(fields) => {
let mut all_idents = fields.named.into_iter().filter_map(|f| {
let f_ident = f.ident.unwrap();
let name = f_ident.to_string();
if name.starts_with("reserved") || name == "_unused" {
None
} else {
Some(f_ident)
}
});
let first = match all_idents.next() {
Some(f) => f,
None => return None,
};
(first, all_idents)
}
_ => return None,
};
Some(quote! {
impl #trait_ for #path_prefix :: #struct_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(#first_field), ": ").as_bytes())?;
#trait_::write(&self.#first_field, "", 0, writer)?;
#(
writer.write_all(concat!(", ", stringify!(#rest_of_fields), ": ").as_bytes())?;
#trait_iter::write(&self.#rest_of_fields, "", 0, writer)?;
)*
writer.write_all(b" }")
}
}
})
}
}
Item::Type(item_type) => {
if state.ignore_types.contains(&item_type.ident) {
return None;
};
match *(item_type.ty) {
Type::Ptr(_) => {
let type_ = item_type.ident;
Some(quote! {
impl #trait_ for #path_prefix :: #type_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", *self)
}
}
})
}
Type::Path(type_path) => {
if type_path.path.leading_colon.is_some() {
let option_seg = type_path.path.segments.last().unwrap();
if option_seg.ident == "Option" {
match &option_seg.arguments {
PathArguments::AngleBracketed(generic) => match generic.args[0] {
syn::GenericArgument::Type(Type::BareFn(_)) => {
let type_ = &item_type.ident;
return Some(quote! {
impl #trait_ for #path_prefix :: #type_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", unsafe { std::mem::transmute::<#path_prefix :: #type_, *mut ::std::ffi::c_void>(*self) })
}
}
});
}
_ => unreachable!(),
},
_ => unreachable!(),
}
}
}
None
}
_ => unreachable!(),
}
}
Item::Union(_) => None,
Item::Use(_) => None,
_ => unreachable!(),
}
}
struct DeriveDisplayState {
type_path: Path,
trait_: Path,
ignore_types: FxHashSet<Ident>,
ignore_fns: FxHashSet<Ident>,
enums: FxHashMap<Ident, Vec<Ident>>,
}
impl DeriveDisplayState {
fn new(input: DeriveDisplayInput) -> Self {
DeriveDisplayState {
type_path: input.type_path,
trait_: input.trait_,
ignore_types: input.ignore_types.into_iter().collect(),
ignore_fns: input.ignore_fns.into_iter().collect(),
enums: Default::default(),
}
}
fn record_enum_variant(&mut self, enum_: Ident, variant: Ident) {
match self.enums.entry(enum_) {
hash_map::Entry::Occupied(mut entry) => {
entry.get_mut().push(variant);
}
hash_map::Entry::Vacant(entry) => {
entry.insert(vec![variant]);
}
}
}
}
struct DeriveDisplayInput {
type_path: Path,
trait_: Path,
ignore_types: Punctuated<Ident, Token![,]>,
ignore_fns: Punctuated<Ident, Token![,]>,
}
impl Parse for DeriveDisplayInput {
impl Parse for OverrideMacro {
fn parse(input: ParseStream) -> syn::Result<Self> {
let type_path = input.parse::<Path>()?;
input.parse::<Token![,]>()?;
let trait_ = input.parse::<Path>()?;
input.parse::<Token![,]>()?;
let ignore_types_buffer;
bracketed!(ignore_types_buffer in input);
let ignore_types = ignore_types_buffer.parse_terminated(Ident::parse)?;
input.parse::<Token![,]>()?;
let ignore_fns_buffer;
bracketed!(ignore_fns_buffer in input);
let ignore_fns = ignore_fns_buffer.parse_terminated(Ident::parse)?;
Ok(Self {
type_path,
trait_,
ignore_types,
ignore_fns,
})
let macro_ = input.parse::<Path>()?;
input.parse::<Token![<=]>()?;
let functions_content;
bracketed!(functions_content in input);
let functions = functions_content.parse_terminated(Ident::parse, Token![,])?;
Ok(Self { macro_, functions })
}
}
struct ChooseMacro {
default: (Path, Vec<proc_macro2::TokenStream>),
override_lookup: FxHashMap<Ident, Path>,
override_sets: FxHashMap<Path, Vec<proc_macro2::TokenStream>>,
}
impl ChooseMacro {
fn new(input: FnDeclInput) -> Self {
let mut override_lookup = FxHashMap::default();
let mut override_sets = FxHashMap::default();
for OverrideMacro { macro_, functions } in input.overrides {
for ident in functions {
override_lookup.insert(ident, macro_.clone());
override_sets.insert(macro_.clone(), Vec::new());
}
}
Self {
default: (input.normal_macro, Vec::new()),
override_lookup,
override_sets,
}
}
fn add(&mut self, ident: &Ident, tokens: proc_macro2::TokenStream) {
match self.override_lookup.get(ident) {
Some(override_macro) => {
self.override_sets
.get_mut(override_macro)
.unwrap()
.push(tokens);
}
None => self.default.1.push(tokens),
}
}
}
// For some reason prettyplease will append trailing comma *only*
// if there are two or more arguments
struct FixFnSignatures;
impl VisitMut for FixFnSignatures {
fn visit_signature_mut(&mut self, s: &mut syn::Signature) {
s.inputs.pop_punct();
}
}
const MODULES: &[&str] = &[
"context", "device", "driver", "function", "link", "memory", "module", "pointer",
];
#[proc_macro]
pub fn cuda_normalize_fn(tokens: TokenStream) -> TokenStream {
let mut path = parse_macro_input!(tokens as syn::Path);
let fn_ = path
.segments
.pop()
.unwrap()
.into_tuple()
.0
.ident
.to_string();
let already_has_module = MODULES.contains(&&*path.segments.last().unwrap().ident.to_string());
let segments: Vec<String> = split(&fn_[2..]); // skip "cu"
let fn_path = join(segments, !already_has_module);
quote! {
#path #fn_path
}
.into()
}
fn split(fn_: &str) -> Vec<String> {
let mut result = Vec::new();
for c in fn_.chars() {
if c.is_ascii_uppercase() {
result.push(c.to_ascii_lowercase().to_string());
} else {
result.last_mut().unwrap().push(c);
}
}
result
}
fn join(fn_: Vec<String>, find_module: bool) -> Punctuated<Ident, Token![::]> {
fn full_form(segment: &str) -> Option<&[&str]> {
Some(match segment {
"ctx" => &["context"],
"func" => &["function"],
"mem" => &["memory"],
"memcpy" => &["memory", "copy"],
_ => return None,
})
}
let mut normalized: Vec<&str> = Vec::new();
for segment in fn_.iter() {
match full_form(segment) {
Some(segments) => normalized.extend(segments.into_iter()),
None => normalized.push(&*segment),
}
}
if !find_module {
return [Ident::new(&normalized.join("_"), Span::call_site())]
.into_iter()
.collect();
}
if !MODULES.contains(&normalized[0]) {
let mut globalized = vec!["driver"];
globalized.extend(normalized);
normalized = globalized;
}
let (module, path) = normalized.split_first().unwrap();
let path = path.join("_");
[module, &&*path]
.into_iter()
.map(|s| Ident::new(s, Span::call_site()))
.collect()
}

View File

@ -6,3 +6,4 @@ edition = "2018"
[dependencies]
cuda_base = { path = "../cuda_base" }
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
bindgen --rust-target 1.77 /opt/rocm/include/hip/hip_runtime_api.h -o hip_runtime_api.rs --no-layout-tests --default-enum-style=newtype --allowlist-function "hip.*" --allowlist-type "hip.*" --no-derive-debug --must-use-type hipError_t --new-type-alias "^hipDeviceptr_t$" --allowlist-var "^hip.*$" -- -I/opt/rocm/include -D__HIP_PLATFORM_AMD__

View File

@ -1,2 +0,0 @@
#define __HIP_PLATFORM_HCC__
#include <hip/hip_runtime_api.h>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1
ext/spirv-headers vendored

@ -1 +0,0 @@
Subproject commit 308bd07424350a6000f35a77b5f85cd4f3da319e

1
ext/spirv-tools vendored

@ -1 +0,0 @@
Subproject commit e128ab0d624ce7beb08eb9656bb260c597a46d0a

View File

@ -1,8 +0,0 @@
[package]
name = "level_zero-sys"
version = "1.0.4"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
links = "ze_loader"
[lib]

View File

@ -1,4 +0,0 @@
sed 's/^typedef uint32_t ze_.*flags_t;$//g' include/ze_api.h > include/level_zero/ze_api.h
sed -i -r 's/ze_(.*)_flag_t/ze_\1_flags_t/g' include/level_zero/ze_api.h
bindgen --with-derive-default --no-default ".*format_t" --no-default ".*fd_t" --no-default ".*constants_t" --no-default ".*handle_t" --no-default ".*desc_t" --no-default ".*params_t" --size_t-is-usize --default-enum-style=newtype --bitfield-enum ".*flags_t" --whitelist-function "ze.*" --whitelist-type "ze.*" include/level_zero/ze_api.h -o src/ze_api.rs -- -Iinclude
sed -i 's/pub struct _ze_result_t/#[must_use]\npub struct _ze_result_t/g' src/ze_api.rs

View File

@ -1,17 +0,0 @@
use env::VarError;
use std::{env, path::PathBuf};
fn main() -> Result<(), VarError> {
println!("cargo:rustc-link-lib=dylib=ze_loader");
if cfg!(windows) {
let env = env::var("CARGO_CFG_TARGET_ENV")?;
if env == "msvc" {
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
path.push("lib");
println!("cargo:rustc-link-search=native={}", path.display());
} else {
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
};
}
Ok(())
}

View File

@ -1 +0,0 @@
ze_api.h

File diff suppressed because it is too large Load Diff

View File

@ -1,316 +0,0 @@
EXPORTS
zeCommandListAppendBarrier
zeCommandListAppendEventReset
zeCommandListAppendImageCopy
zeCommandListAppendImageCopyFromMemory
zeCommandListAppendImageCopyRegion
zeCommandListAppendImageCopyToMemory
zeCommandListAppendLaunchCooperativeKernel
zeCommandListAppendLaunchKernel
zeCommandListAppendLaunchKernelIndirect
zeCommandListAppendLaunchMultipleKernelsIndirect
zeCommandListAppendMemAdvise
zeCommandListAppendMemoryCopy
zeCommandListAppendMemoryCopyFromContext
zeCommandListAppendMemoryCopyRegion
zeCommandListAppendMemoryFill
zeCommandListAppendMemoryPrefetch
zeCommandListAppendMemoryRangesBarrier
zeCommandListAppendQueryKernelTimestamps
zeCommandListAppendSignalEvent
zeCommandListAppendWaitOnEvents
zeCommandListAppendWriteGlobalTimestamp
zeCommandListClose
zeCommandListCreate
zeCommandListCreateImmediate
zeCommandListDestroy
zeCommandListReset
zeCommandQueueCreate
zeCommandQueueDestroy
zeCommandQueueExecuteCommandLists
zeCommandQueueSynchronize
zeContextCreate
zeContextCreateEx
zeContextDestroy
zeContextEvictImage
zeContextEvictMemory
zeContextGetStatus
zeContextMakeImageResident
zeContextMakeMemoryResident
zeContextSystemBarrier
zeDeviceCanAccessPeer
zeDeviceGet
zeDeviceGetCacheProperties
zeDeviceGetCommandQueueGroupProperties
zeDeviceGetComputeProperties
zeDeviceGetExternalMemoryProperties
zeDeviceGetGlobalTimestamps
zeDeviceGetImageProperties
zeDeviceGetMemoryAccessProperties
zeDeviceGetMemoryProperties
zeDeviceGetModuleProperties
zeDeviceGetP2PProperties
zeDeviceGetProperties
zeDeviceGetStatus
zeDeviceGetSubDevices
zeDriverGet
zeDriverGetApiVersion
zeDriverGetExtensionFunctionAddress
zeDriverGetExtensionProperties
zeDriverGetIpcProperties
zeDriverGetProperties
zeEventCreate
zeEventDestroy
zeEventHostReset
zeEventHostSignal
zeEventHostSynchronize
zeEventPoolCloseIpcHandle
zeEventPoolCreate
zeEventPoolDestroy
zeEventPoolGetIpcHandle
zeEventPoolOpenIpcHandle
zeEventQueryKernelTimestamp
zeEventQueryStatus
zeFenceCreate
zeFenceDestroy
zeFenceHostSynchronize
zeFenceQueryStatus
zeFenceReset
zeGetCommandListProcAddrTable
zeGetCommandQueueProcAddrTable
zeGetContextProcAddrTable
zeGetDeviceProcAddrTable
zeGetDriverProcAddrTable
zeGetEventPoolProcAddrTable
zeGetEventProcAddrTable
zeGetFenceProcAddrTable
zeGetGlobalProcAddrTable
zeGetImageProcAddrTable
zeGetKernelExpProcAddrTable
zeGetKernelProcAddrTable
zeGetMemProcAddrTable
zeGetModuleBuildLogProcAddrTable
zeGetModuleProcAddrTable
zeGetPhysicalMemProcAddrTable
zeGetSamplerProcAddrTable
zeGetVirtualMemProcAddrTable
zeImageCreate
zeImageDestroy
zeImageGetProperties
zeInit
zeKernelCreate
zeKernelDestroy
zeKernelGetIndirectAccess
zeKernelGetName
zeKernelGetProperties
zeKernelGetSourceAttributes
zeKernelSetArgumentValue
zeKernelSetCacheConfig
zeKernelSetGlobalOffsetExp
zeKernelSetGroupSize
zeKernelSetIndirectAccess
zeKernelSuggestGroupSize
zeKernelSuggestMaxCooperativeGroupCount
zeLoaderInit
zeMemAllocDevice
zeMemAllocHost
zeMemAllocShared
zeMemCloseIpcHandle
zeMemFree
zeMemGetAddressRange
zeMemGetAllocProperties
zeMemGetIpcHandle
zeMemOpenIpcHandle
zeModuleBuildLogDestroy
zeModuleBuildLogGetString
zeModuleCreate
zeModuleDestroy
zeModuleDynamicLink
zeModuleGetFunctionPointer
zeModuleGetGlobalPointer
zeModuleGetKernelNames
zeModuleGetNativeBinary
zeModuleGetProperties
zePhysicalMemCreate
zePhysicalMemDestroy
zeSamplerCreate
zeSamplerDestroy
zeVirtualMemFree
zeVirtualMemGetAccessAttribute
zeVirtualMemMap
zeVirtualMemQueryPageSize
zeVirtualMemReserve
zeVirtualMemSetAccessAttribute
zeVirtualMemUnmap
zelGetTracerApiProcAddrTable
zelTracerCreate
zelTracerDestroy
zelTracerSetEnabled
zelTracerSetEpilogues
zelTracerSetPrologues
zesDeviceEnumDiagnosticTestSuites
zesDeviceEnumEngineGroups
zesDeviceEnumFabricPorts
zesDeviceEnumFans
zesDeviceEnumFirmwares
zesDeviceEnumFrequencyDomains
zesDeviceEnumLeds
zesDeviceEnumMemoryModules
zesDeviceEnumPerformanceFactorDomains
zesDeviceEnumPowerDomains
zesDeviceEnumPsus
zesDeviceEnumRasErrorSets
zesDeviceEnumSchedulers
zesDeviceEnumStandbyDomains
zesDeviceEnumTemperatureSensors
zesDeviceEventRegister
zesDeviceGetProperties
zesDeviceGetState
zesDevicePciGetBars
zesDevicePciGetProperties
zesDevicePciGetState
zesDevicePciGetStats
zesDeviceProcessesGetState
zesDeviceReset
zesDiagnosticsGetProperties
zesDiagnosticsGetTests
zesDiagnosticsRunTests
zesDriverEventListen
zesDriverEventListenEx
zesEngineGetActivity
zesEngineGetProperties
zesFabricPortGetConfig
zesFabricPortGetLinkType
zesFabricPortGetProperties
zesFabricPortGetState
zesFabricPortGetThroughput
zesFabricPortSetConfig
zesFanGetConfig
zesFanGetProperties
zesFanGetState
zesFanSetDefaultMode
zesFanSetFixedSpeedMode
zesFanSetSpeedTableMode
zesFirmwareFlash
zesFirmwareGetProperties
zesFrequencyGetAvailableClocks
zesFrequencyGetProperties
zesFrequencyGetRange
zesFrequencyGetState
zesFrequencyGetThrottleTime
zesFrequencyOcGetCapabilities
zesFrequencyOcGetFrequencyTarget
zesFrequencyOcGetIccMax
zesFrequencyOcGetMode
zesFrequencyOcGetTjMax
zesFrequencyOcGetVoltageTarget
zesFrequencyOcSetFrequencyTarget
zesFrequencyOcSetIccMax
zesFrequencyOcSetMode
zesFrequencyOcSetTjMax
zesFrequencyOcSetVoltageTarget
zesFrequencySetRange
zesGetDeviceProcAddrTable
zesGetDiagnosticsProcAddrTable
zesGetDriverProcAddrTable
zesGetEngineProcAddrTable
zesGetFabricPortProcAddrTable
zesGetFanProcAddrTable
zesGetFirmwareProcAddrTable
zesGetFrequencyProcAddrTable
zesGetLedProcAddrTable
zesGetMemoryProcAddrTable
zesGetPerformanceFactorProcAddrTable
zesGetPowerProcAddrTable
zesGetPsuProcAddrTable
zesGetRasProcAddrTable
zesGetSchedulerProcAddrTable
zesGetStandbyProcAddrTable
zesGetTemperatureProcAddrTable
zesLedGetProperties
zesLedGetState
zesLedSetColor
zesLedSetState
zesMemoryGetBandwidth
zesMemoryGetProperties
zesMemoryGetState
zesPerformanceFactorGetConfig
zesPerformanceFactorGetProperties
zesPerformanceFactorSetConfig
zesPowerGetEnergyCounter
zesPowerGetEnergyThreshold
zesPowerGetLimits
zesPowerGetProperties
zesPowerSetEnergyThreshold
zesPowerSetLimits
zesPsuGetProperties
zesPsuGetState
zesRasGetConfig
zesRasGetProperties
zesRasGetState
zesRasSetConfig
zesSchedulerGetCurrentMode
zesSchedulerGetProperties
zesSchedulerGetTimeoutModeProperties
zesSchedulerGetTimesliceModeProperties
zesSchedulerSetComputeUnitDebugMode
zesSchedulerSetExclusiveMode
zesSchedulerSetTimeoutMode
zesSchedulerSetTimesliceMode
zesStandbyGetMode
zesStandbyGetProperties
zesStandbySetMode
zesTemperatureGetConfig
zesTemperatureGetProperties
zesTemperatureGetState
zesTemperatureSetConfig
zetCommandListAppendMetricMemoryBarrier
zetCommandListAppendMetricQueryBegin
zetCommandListAppendMetricQueryEnd
zetCommandListAppendMetricStreamerMarker
zetContextActivateMetricGroups
zetDebugAcknowledgeEvent
zetDebugAttach
zetDebugDetach
zetDebugGetRegisterSetProperties
zetDebugInterrupt
zetDebugReadEvent
zetDebugReadMemory
zetDebugReadRegisters
zetDebugResume
zetDebugWriteMemory
zetDebugWriteRegisters
zetDeviceGetDebugProperties
zetGetCommandListProcAddrTable
zetGetContextProcAddrTable
zetGetDebugProcAddrTable
zetGetDeviceProcAddrTable
zetGetKernelProcAddrTable
zetGetMetricGroupProcAddrTable
zetGetMetricProcAddrTable
zetGetMetricQueryPoolProcAddrTable
zetGetMetricQueryProcAddrTable
zetGetMetricStreamerProcAddrTable
zetGetModuleProcAddrTable
zetGetTracerExpProcAddrTable
zetKernelGetProfileInfo
zetMetricGet
zetMetricGetProperties
zetMetricGroupCalculateMetricValues
zetMetricGroupGet
zetMetricGroupGetProperties
zetMetricQueryCreate
zetMetricQueryDestroy
zetMetricQueryGetData
zetMetricQueryPoolCreate
zetMetricQueryPoolDestroy
zetMetricQueryReset
zetMetricStreamerClose
zetMetricStreamerOpen
zetMetricStreamerReadData
zetModuleGetDebugInfo
zetTracerExpCreate
zetTracerExpDestroy
zetTracerExpSetEnabled
zetTracerExpSetEpilogues
zetTracerExpSetPrologues

Binary file not shown.

View File

@ -1,3 +0,0 @@
#![allow(warnings)]
pub mod ze_api;
pub use ze_api::*;

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +0,0 @@
[package]
name = "level_zero"
version = "0.1.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
[lib]
[dependencies]
level_zero-sys = { path = "../level_zero-sys" }
[dependencies.ocl-core]
version = "0.11"
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]

View File

@ -1 +0,0 @@
More ergonomic bindings for oneAPI Level Zero

View File

@ -1,4 +0,0 @@
pub use level_zero_sys as sys;
pub mod ze;
pub use ze::*;

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,10 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <llvm-c/Core.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Type.h>
#include <llvm/IR/Instructions.h>
#pragma GCC diagnostic pop
using namespace llvm;
@ -189,7 +192,8 @@ void LLVMZludaBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
auto builder = llvm::unwrap(B);
LLVMContext &context = builder->getContext();
builder->CreateFence(mapFromLLVMOrdering(Ordering),
context.getOrInsertSyncScopeID(scope));
context.getOrInsertSyncScopeID(scope),
Name);
}
LLVM_C_EXTERN_C_END

Binary file not shown.

View File

@ -25,16 +25,16 @@ extern "C"
return (uint32_t)__ockl_get_local_size(member);
}
size_t __ockl_get_global_id(uint32_t) __device__;
size_t __ockl_get_group_id(uint32_t) __device__;
uint32_t FUNC(sreg_ctaid)(uint8_t member)
{
return (uint32_t)__ockl_get_global_id(member);
return (uint32_t)__ockl_get_group_id(member);
}
size_t __ockl_get_global_size(uint32_t) __device__;
size_t __ockl_get_num_groups(uint32_t) __device__;
uint32_t FUNC(sreg_nctaid)(uint8_t member)
{
return (uint32_t)__ockl_get_global_size(member);
return (uint32_t)__ockl_get_num_groups(member);
}
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));

View File

@ -284,20 +284,40 @@ fn immediate_value<'a, 'input>(stream: &mut PtxParser<'a, 'input>) -> PResult<as
.parse_next(stream)
}
pub fn parse_module_unchecked<'input>(text: &'input str) -> Option<ast::Module<'input>> {
let input = lex_with_span(text).ok()?;
let mut errors = Vec::new();
let state = PtxParserState::new(text, &mut errors);
let parser = PtxParser {
state,
input: &input[..],
pub fn parse_for_errors<'input>(text: &'input str) -> Vec<PtxError> {
let (tokens, mut errors) = lex_with_span_unchecked(text);
let parse_result = {
let state = PtxParserState::new(text, &mut errors);
let parser = PtxParser {
state,
input: &tokens[..],
};
module
.parse(parser)
.map_err(|err| PtxError::Parser(err.into_inner()))
};
let parsing_result = module.parse(parser).ok();
if !errors.is_empty() {
None
} else {
parsing_result
match parse_result {
Ok(_) => {}
Err(err) => {
errors.push(err);
}
}
errors
}
fn lex_with_span_unchecked<'input>(
text: &'input str,
) -> (Vec<(Token<'input>, logos::Span)>, Vec<PtxError>) {
let lexer = Token::lexer(text);
let mut result = Vec::new();
let mut errors = Vec::new();
for (token, span) in lexer.spanned() {
match token {
Ok(t) => result.push((t, span)),
Err(err) => errors.push(PtxError::Lexer { source: err }),
}
}
(result, errors)
}
pub fn parse_module_checked<'input>(
@ -342,17 +362,6 @@ pub fn parse_module_checked<'input>(
}
}
fn lex_with_span<'input>(
text: &'input str,
) -> Result<Vec<(Token<'input>, logos::Span)>, TokenError> {
let lexer = Token::lexer(text);
let mut result = Vec::new();
for (token, span) in lexer.spanned() {
result.push((token?, span));
}
Ok(result)
}
fn module<'a, 'input>(stream: &mut PtxParser<'a, 'input>) -> PResult<ast::Module<'input>> {
(
version,

View File

@ -1,10 +0,0 @@
[package]
name = "spirv_tools-sys"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
[lib]
[build-dependencies]
cmake = "0.1"

View File

@ -1 +0,0 @@
bindgen --whitelist-type="spv.*" --whitelist-function="spv.*" --size_t-is-usize --default-enum-style=rust --bitfield-enum="spv_text_to_binary_options_t|spv_binary_to_text_options_t" ../ext/SPIRV-Tools/include/spirv-tools/libspirv.h -o src/spirv_tools.rs

View File

@ -1,28 +0,0 @@
extern crate cmake;
use cmake::Config;
use std::{env::VarError, path::PathBuf};
fn main() -> Result<(), VarError> {
let root_path = std::env::var("CARGO_MANIFEST_DIR")?;
let mut headers_path = PathBuf::new();
headers_path.push(root_path);
headers_path.push("../ext/spirv-headers");
let spirv_tools_dir = Config::new("../ext/spirv-tools")
.always_configure(false)
.define("SPIRV-Headers_SOURCE_DIR", headers_path)
.define("SPIRV_SKIP_EXECUTABLES", "ON")
.define("SPIRV_SKIP_TESTS", "ON")
.build();
println!(
"cargo:rustc-link-search=native={}/bin",
spirv_tools_dir.display()
);
println!(
"cargo:rustc-link-search=native={}/lib",
spirv_tools_dir.display()
);
// dynamic linking to avoid linking to C++ runtime
println!("cargo:rustc-link-lib=dylib=SPIRV-Tools-shared");
Ok(())
}

View File

@ -1,3 +0,0 @@
#[allow(warnings)]
mod spirv_tools;
pub use spirv_tools::*;

View File

@ -1,972 +0,0 @@
/* automatically generated by rust-bindgen 0.54.1 */
pub type __uint16_t = ::std::os::raw::c_ushort;
pub type __uint32_t = ::std::os::raw::c_uint;
#[repr(i32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_result_t {
SPV_SUCCESS = 0,
SPV_UNSUPPORTED = 1,
SPV_END_OF_STREAM = 2,
SPV_WARNING = 3,
SPV_FAILED_MATCH = 4,
SPV_REQUESTED_TERMINATION = 5,
SPV_ERROR_INTERNAL = -1,
SPV_ERROR_OUT_OF_MEMORY = -2,
SPV_ERROR_INVALID_POINTER = -3,
SPV_ERROR_INVALID_BINARY = -4,
SPV_ERROR_INVALID_TEXT = -5,
SPV_ERROR_INVALID_TABLE = -6,
SPV_ERROR_INVALID_VALUE = -7,
SPV_ERROR_INVALID_DIAGNOSTIC = -8,
SPV_ERROR_INVALID_LOOKUP = -9,
SPV_ERROR_INVALID_ID = -10,
SPV_ERROR_INVALID_CFG = -11,
SPV_ERROR_INVALID_LAYOUT = -12,
SPV_ERROR_INVALID_CAPABILITY = -13,
SPV_ERROR_INVALID_DATA = -14,
SPV_ERROR_MISSING_EXTENSION = -15,
SPV_ERROR_WRONG_VERSION = -16,
_spv_result_t = 2147483647,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_message_level_t {
SPV_MSG_FATAL = 0,
SPV_MSG_INTERNAL_ERROR = 1,
SPV_MSG_ERROR = 2,
SPV_MSG_WARNING = 3,
SPV_MSG_INFO = 4,
SPV_MSG_DEBUG = 5,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_endianness_t {
SPV_ENDIANNESS_LITTLE = 0,
SPV_ENDIANNESS_BIG = 1,
_spv_endianness_t = 2147483647,
}
impl spv_operand_type_t {
pub const SPV_OPERAND_TYPE_FIRST_OPTIONAL_TYPE: spv_operand_type_t =
spv_operand_type_t::SPV_OPERAND_TYPE_OPTIONAL_ID;
}
impl spv_operand_type_t {
pub const SPV_OPERAND_TYPE_FIRST_VARIABLE_TYPE: spv_operand_type_t =
spv_operand_type_t::SPV_OPERAND_TYPE_VARIABLE_ID;
}
impl spv_operand_type_t {
pub const SPV_OPERAND_TYPE_LAST_VARIABLE_TYPE: spv_operand_type_t =
spv_operand_type_t::SPV_OPERAND_TYPE_VARIABLE_ID_LITERAL_INTEGER;
}
impl spv_operand_type_t {
pub const SPV_OPERAND_TYPE_LAST_OPTIONAL_TYPE: spv_operand_type_t =
spv_operand_type_t::SPV_OPERAND_TYPE_VARIABLE_ID_LITERAL_INTEGER;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_operand_type_t {
SPV_OPERAND_TYPE_NONE = 0,
SPV_OPERAND_TYPE_ID = 1,
SPV_OPERAND_TYPE_TYPE_ID = 2,
SPV_OPERAND_TYPE_RESULT_ID = 3,
SPV_OPERAND_TYPE_MEMORY_SEMANTICS_ID = 4,
SPV_OPERAND_TYPE_SCOPE_ID = 5,
SPV_OPERAND_TYPE_LITERAL_INTEGER = 6,
SPV_OPERAND_TYPE_EXTENSION_INSTRUCTION_NUMBER = 7,
SPV_OPERAND_TYPE_SPEC_CONSTANT_OP_NUMBER = 8,
SPV_OPERAND_TYPE_TYPED_LITERAL_NUMBER = 9,
SPV_OPERAND_TYPE_LITERAL_STRING = 10,
SPV_OPERAND_TYPE_SOURCE_LANGUAGE = 11,
SPV_OPERAND_TYPE_EXECUTION_MODEL = 12,
SPV_OPERAND_TYPE_ADDRESSING_MODEL = 13,
SPV_OPERAND_TYPE_MEMORY_MODEL = 14,
SPV_OPERAND_TYPE_EXECUTION_MODE = 15,
SPV_OPERAND_TYPE_STORAGE_CLASS = 16,
SPV_OPERAND_TYPE_DIMENSIONALITY = 17,
SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE = 18,
SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE = 19,
SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT = 20,
SPV_OPERAND_TYPE_IMAGE_CHANNEL_ORDER = 21,
SPV_OPERAND_TYPE_IMAGE_CHANNEL_DATA_TYPE = 22,
SPV_OPERAND_TYPE_FP_ROUNDING_MODE = 23,
SPV_OPERAND_TYPE_LINKAGE_TYPE = 24,
SPV_OPERAND_TYPE_ACCESS_QUALIFIER = 25,
SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE = 26,
SPV_OPERAND_TYPE_DECORATION = 27,
SPV_OPERAND_TYPE_BUILT_IN = 28,
SPV_OPERAND_TYPE_GROUP_OPERATION = 29,
SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS = 30,
SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO = 31,
SPV_OPERAND_TYPE_CAPABILITY = 32,
SPV_OPERAND_TYPE_RAY_FLAGS = 33,
SPV_OPERAND_TYPE_RAY_QUERY_INTERSECTION = 34,
SPV_OPERAND_TYPE_RAY_QUERY_COMMITTED_INTERSECTION_TYPE = 35,
SPV_OPERAND_TYPE_RAY_QUERY_CANDIDATE_INTERSECTION_TYPE = 36,
SPV_OPERAND_TYPE_IMAGE = 37,
SPV_OPERAND_TYPE_FP_FAST_MATH_MODE = 38,
SPV_OPERAND_TYPE_SELECTION_CONTROL = 39,
SPV_OPERAND_TYPE_LOOP_CONTROL = 40,
SPV_OPERAND_TYPE_FUNCTION_CONTROL = 41,
SPV_OPERAND_TYPE_MEMORY_ACCESS = 42,
SPV_OPERAND_TYPE_OPTIONAL_ID = 43,
SPV_OPERAND_TYPE_OPTIONAL_IMAGE = 44,
SPV_OPERAND_TYPE_OPTIONAL_MEMORY_ACCESS = 45,
SPV_OPERAND_TYPE_OPTIONAL_LITERAL_INTEGER = 46,
SPV_OPERAND_TYPE_OPTIONAL_LITERAL_NUMBER = 47,
SPV_OPERAND_TYPE_OPTIONAL_TYPED_LITERAL_INTEGER = 48,
SPV_OPERAND_TYPE_OPTIONAL_LITERAL_STRING = 49,
SPV_OPERAND_TYPE_OPTIONAL_ACCESS_QUALIFIER = 50,
SPV_OPERAND_TYPE_OPTIONAL_CIV = 51,
SPV_OPERAND_TYPE_VARIABLE_ID = 52,
SPV_OPERAND_TYPE_VARIABLE_LITERAL_INTEGER = 53,
SPV_OPERAND_TYPE_VARIABLE_LITERAL_INTEGER_ID = 54,
SPV_OPERAND_TYPE_VARIABLE_ID_LITERAL_INTEGER = 55,
SPV_OPERAND_TYPE_DEBUG_INFO_FLAGS = 56,
SPV_OPERAND_TYPE_DEBUG_BASE_TYPE_ATTRIBUTE_ENCODING = 57,
SPV_OPERAND_TYPE_DEBUG_COMPOSITE_TYPE = 58,
SPV_OPERAND_TYPE_DEBUG_TYPE_QUALIFIER = 59,
SPV_OPERAND_TYPE_DEBUG_OPERATION = 60,
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_INFO_FLAGS = 61,
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_BASE_TYPE_ATTRIBUTE_ENCODING = 62,
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_COMPOSITE_TYPE = 63,
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_TYPE_QUALIFIER = 64,
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_OPERATION = 65,
SPV_OPERAND_TYPE_CLDEBUG100_DEBUG_IMPORTED_ENTITY = 66,
SPV_OPERAND_TYPE_NUM_OPERAND_TYPES = 67,
_spv_operand_type_t = 2147483647,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_ext_inst_type_t {
SPV_EXT_INST_TYPE_NONE = 0,
SPV_EXT_INST_TYPE_GLSL_STD_450 = 1,
SPV_EXT_INST_TYPE_OPENCL_STD = 2,
SPV_EXT_INST_TYPE_SPV_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER = 3,
SPV_EXT_INST_TYPE_SPV_AMD_SHADER_TRINARY_MINMAX = 4,
SPV_EXT_INST_TYPE_SPV_AMD_GCN_SHADER = 5,
SPV_EXT_INST_TYPE_SPV_AMD_SHADER_BALLOT = 6,
SPV_EXT_INST_TYPE_DEBUGINFO = 7,
SPV_EXT_INST_TYPE_OPENCL_DEBUGINFO_100 = 8,
SPV_EXT_INST_TYPE_NONSEMANTIC_UNKNOWN = 9,
_spv_ext_inst_type_t = 2147483647,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_number_kind_t {
SPV_NUMBER_NONE = 0,
SPV_NUMBER_UNSIGNED_INT = 1,
SPV_NUMBER_SIGNED_INT = 2,
SPV_NUMBER_FLOATING = 3,
}
impl spv_text_to_binary_options_t {
pub const SPV_TEXT_TO_BINARY_OPTION_NONE: spv_text_to_binary_options_t =
spv_text_to_binary_options_t(1);
}
impl spv_text_to_binary_options_t {
pub const SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS: spv_text_to_binary_options_t =
spv_text_to_binary_options_t(2);
}
impl spv_text_to_binary_options_t {
pub const _spv_text_to_binary_options_t: spv_text_to_binary_options_t =
spv_text_to_binary_options_t(2147483647);
}
impl ::std::ops::BitOr<spv_text_to_binary_options_t> for spv_text_to_binary_options_t {
type Output = Self;
#[inline]
fn bitor(self, other: Self) -> Self {
spv_text_to_binary_options_t(self.0 | other.0)
}
}
impl ::std::ops::BitOrAssign for spv_text_to_binary_options_t {
#[inline]
fn bitor_assign(&mut self, rhs: spv_text_to_binary_options_t) {
self.0 |= rhs.0;
}
}
impl ::std::ops::BitAnd<spv_text_to_binary_options_t> for spv_text_to_binary_options_t {
type Output = Self;
#[inline]
fn bitand(self, other: Self) -> Self {
spv_text_to_binary_options_t(self.0 & other.0)
}
}
impl ::std::ops::BitAndAssign for spv_text_to_binary_options_t {
#[inline]
fn bitand_assign(&mut self, rhs: spv_text_to_binary_options_t) {
self.0 &= rhs.0;
}
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct spv_text_to_binary_options_t(pub u32);
impl spv_binary_to_text_options_t {
pub const SPV_BINARY_TO_TEXT_OPTION_NONE: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(1);
}
impl spv_binary_to_text_options_t {
pub const SPV_BINARY_TO_TEXT_OPTION_PRINT: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(2);
}
impl spv_binary_to_text_options_t {
pub const SPV_BINARY_TO_TEXT_OPTION_COLOR: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(4);
}
impl spv_binary_to_text_options_t {
pub const SPV_BINARY_TO_TEXT_OPTION_INDENT: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(8);
}
impl spv_binary_to_text_options_t {
pub const SPV_BINARY_TO_TEXT_OPTION_SHOW_BYTE_OFFSET: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(16);
}
impl spv_binary_to_text_options_t {
pub const SPV_BINARY_TO_TEXT_OPTION_NO_HEADER: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(32);
}
impl spv_binary_to_text_options_t {
pub const SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(64);
}
impl spv_binary_to_text_options_t {
pub const _spv_binary_to_text_options_t: spv_binary_to_text_options_t =
spv_binary_to_text_options_t(2147483647);
}
impl ::std::ops::BitOr<spv_binary_to_text_options_t> for spv_binary_to_text_options_t {
type Output = Self;
#[inline]
fn bitor(self, other: Self) -> Self {
spv_binary_to_text_options_t(self.0 | other.0)
}
}
impl ::std::ops::BitOrAssign for spv_binary_to_text_options_t {
#[inline]
fn bitor_assign(&mut self, rhs: spv_binary_to_text_options_t) {
self.0 |= rhs.0;
}
}
impl ::std::ops::BitAnd<spv_binary_to_text_options_t> for spv_binary_to_text_options_t {
type Output = Self;
#[inline]
fn bitand(self, other: Self) -> Self {
spv_binary_to_text_options_t(self.0 & other.0)
}
}
impl ::std::ops::BitAndAssign for spv_binary_to_text_options_t {
#[inline]
fn bitand_assign(&mut self, rhs: spv_binary_to_text_options_t) {
self.0 &= rhs.0;
}
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct spv_binary_to_text_options_t(pub u32);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_parsed_operand_t {
pub offset: u16,
pub num_words: u16,
pub type_: spv_operand_type_t,
pub number_kind: spv_number_kind_t,
pub number_bit_width: u32,
}
#[test]
fn bindgen_test_layout_spv_parsed_operand_t() {
assert_eq!(
::std::mem::size_of::<spv_parsed_operand_t>(),
16usize,
concat!("Size of: ", stringify!(spv_parsed_operand_t))
);
assert_eq!(
::std::mem::align_of::<spv_parsed_operand_t>(),
4usize,
concat!("Alignment of ", stringify!(spv_parsed_operand_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_parsed_operand_t>())).offset as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_operand_t),
"::",
stringify!(offset)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_parsed_operand_t>())).num_words as *const _ as usize },
2usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_operand_t),
"::",
stringify!(num_words)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_parsed_operand_t>())).type_ as *const _ as usize },
4usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_operand_t),
"::",
stringify!(type_)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_operand_t>())).number_kind as *const _ as usize
},
8usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_operand_t),
"::",
stringify!(number_kind)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_operand_t>())).number_bit_width as *const _ as usize
},
12usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_operand_t),
"::",
stringify!(number_bit_width)
)
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_parsed_instruction_t {
pub words: *const u32,
pub num_words: u16,
pub opcode: u16,
pub ext_inst_type: spv_ext_inst_type_t,
pub type_id: u32,
pub result_id: u32,
pub operands: *const spv_parsed_operand_t,
pub num_operands: u16,
}
#[test]
fn bindgen_test_layout_spv_parsed_instruction_t() {
assert_eq!(
::std::mem::size_of::<spv_parsed_instruction_t>(),
40usize,
concat!("Size of: ", stringify!(spv_parsed_instruction_t))
);
assert_eq!(
::std::mem::align_of::<spv_parsed_instruction_t>(),
8usize,
concat!("Alignment of ", stringify!(spv_parsed_instruction_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_parsed_instruction_t>())).words as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(words)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).num_words as *const _ as usize
},
8usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(num_words)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_parsed_instruction_t>())).opcode as *const _ as usize },
10usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(opcode)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).ext_inst_type as *const _ as usize
},
12usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(ext_inst_type)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).type_id as *const _ as usize
},
16usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(type_id)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).result_id as *const _ as usize
},
20usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(result_id)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).operands as *const _ as usize
},
24usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(operands)
)
);
assert_eq!(
unsafe {
&(*(::std::ptr::null::<spv_parsed_instruction_t>())).num_operands as *const _ as usize
},
32usize,
concat!(
"Offset of field: ",
stringify!(spv_parsed_instruction_t),
"::",
stringify!(num_operands)
)
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_const_binary_t {
pub code: *const u32,
pub wordCount: usize,
}
#[test]
fn bindgen_test_layout_spv_const_binary_t() {
assert_eq!(
::std::mem::size_of::<spv_const_binary_t>(),
16usize,
concat!("Size of: ", stringify!(spv_const_binary_t))
);
assert_eq!(
::std::mem::align_of::<spv_const_binary_t>(),
8usize,
concat!("Alignment of ", stringify!(spv_const_binary_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_const_binary_t>())).code as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(spv_const_binary_t),
"::",
stringify!(code)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_const_binary_t>())).wordCount as *const _ as usize },
8usize,
concat!(
"Offset of field: ",
stringify!(spv_const_binary_t),
"::",
stringify!(wordCount)
)
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_binary_t {
pub code: *mut u32,
pub wordCount: usize,
}
#[test]
fn bindgen_test_layout_spv_binary_t() {
assert_eq!(
::std::mem::size_of::<spv_binary_t>(),
16usize,
concat!("Size of: ", stringify!(spv_binary_t))
);
assert_eq!(
::std::mem::align_of::<spv_binary_t>(),
8usize,
concat!("Alignment of ", stringify!(spv_binary_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_binary_t>())).code as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(spv_binary_t),
"::",
stringify!(code)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_binary_t>())).wordCount as *const _ as usize },
8usize,
concat!(
"Offset of field: ",
stringify!(spv_binary_t),
"::",
stringify!(wordCount)
)
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_text_t {
pub str_: *const ::std::os::raw::c_char,
pub length: usize,
}
#[test]
fn bindgen_test_layout_spv_text_t() {
assert_eq!(
::std::mem::size_of::<spv_text_t>(),
16usize,
concat!("Size of: ", stringify!(spv_text_t))
);
assert_eq!(
::std::mem::align_of::<spv_text_t>(),
8usize,
concat!("Alignment of ", stringify!(spv_text_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_text_t>())).str_ as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(spv_text_t),
"::",
stringify!(str_)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_text_t>())).length as *const _ as usize },
8usize,
concat!(
"Offset of field: ",
stringify!(spv_text_t),
"::",
stringify!(length)
)
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_position_t {
pub line: usize,
pub column: usize,
pub index: usize,
}
#[test]
fn bindgen_test_layout_spv_position_t() {
assert_eq!(
::std::mem::size_of::<spv_position_t>(),
24usize,
concat!("Size of: ", stringify!(spv_position_t))
);
assert_eq!(
::std::mem::align_of::<spv_position_t>(),
8usize,
concat!("Alignment of ", stringify!(spv_position_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_position_t>())).line as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(spv_position_t),
"::",
stringify!(line)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_position_t>())).column as *const _ as usize },
8usize,
concat!(
"Offset of field: ",
stringify!(spv_position_t),
"::",
stringify!(column)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_position_t>())).index as *const _ as usize },
16usize,
concat!(
"Offset of field: ",
stringify!(spv_position_t),
"::",
stringify!(index)
)
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_diagnostic_t {
pub position: spv_position_t,
pub error: *mut ::std::os::raw::c_char,
pub isTextSource: bool,
}
#[test]
fn bindgen_test_layout_spv_diagnostic_t() {
assert_eq!(
::std::mem::size_of::<spv_diagnostic_t>(),
40usize,
concat!("Size of: ", stringify!(spv_diagnostic_t))
);
assert_eq!(
::std::mem::align_of::<spv_diagnostic_t>(),
8usize,
concat!("Alignment of ", stringify!(spv_diagnostic_t))
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_diagnostic_t>())).position as *const _ as usize },
0usize,
concat!(
"Offset of field: ",
stringify!(spv_diagnostic_t),
"::",
stringify!(position)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_diagnostic_t>())).error as *const _ as usize },
24usize,
concat!(
"Offset of field: ",
stringify!(spv_diagnostic_t),
"::",
stringify!(error)
)
);
assert_eq!(
unsafe { &(*(::std::ptr::null::<spv_diagnostic_t>())).isTextSource as *const _ as usize },
32usize,
concat!(
"Offset of field: ",
stringify!(spv_diagnostic_t),
"::",
stringify!(isTextSource)
)
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_context_t {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_validator_options_t {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_optimizer_options_t {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_reducer_options_t {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct spv_fuzzer_options_t {
_unused: [u8; 0],
}
pub type spv_const_binary = *mut spv_const_binary_t;
pub type spv_binary = *mut spv_binary_t;
pub type spv_text = *mut spv_text_t;
pub type spv_position = *mut spv_position_t;
pub type spv_diagnostic = *mut spv_diagnostic_t;
pub type spv_const_context = *const spv_context_t;
pub type spv_context = *mut spv_context_t;
pub type spv_validator_options = *mut spv_validator_options_t;
pub type spv_const_validator_options = *const spv_validator_options_t;
pub type spv_optimizer_options = *mut spv_optimizer_options_t;
pub type spv_const_optimizer_options = *const spv_optimizer_options_t;
pub type spv_reducer_options = *mut spv_reducer_options_t;
pub type spv_const_reducer_options = *const spv_reducer_options_t;
pub type spv_fuzzer_options = *mut spv_fuzzer_options_t;
pub type spv_const_fuzzer_options = *const spv_fuzzer_options_t;
extern "C" {
pub fn spvSoftwareVersionString() -> *const ::std::os::raw::c_char;
}
extern "C" {
pub fn spvSoftwareVersionDetailsString() -> *const ::std::os::raw::c_char;
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_target_env {
SPV_ENV_UNIVERSAL_1_0 = 0,
SPV_ENV_VULKAN_1_0 = 1,
SPV_ENV_UNIVERSAL_1_1 = 2,
SPV_ENV_OPENCL_2_1 = 3,
SPV_ENV_OPENCL_2_2 = 4,
SPV_ENV_OPENGL_4_0 = 5,
SPV_ENV_OPENGL_4_1 = 6,
SPV_ENV_OPENGL_4_2 = 7,
SPV_ENV_OPENGL_4_3 = 8,
SPV_ENV_OPENGL_4_5 = 9,
SPV_ENV_UNIVERSAL_1_2 = 10,
SPV_ENV_OPENCL_1_2 = 11,
SPV_ENV_OPENCL_EMBEDDED_1_2 = 12,
SPV_ENV_OPENCL_2_0 = 13,
SPV_ENV_OPENCL_EMBEDDED_2_0 = 14,
SPV_ENV_OPENCL_EMBEDDED_2_1 = 15,
SPV_ENV_OPENCL_EMBEDDED_2_2 = 16,
SPV_ENV_UNIVERSAL_1_3 = 17,
SPV_ENV_VULKAN_1_1 = 18,
SPV_ENV_WEBGPU_0 = 19,
SPV_ENV_UNIVERSAL_1_4 = 20,
SPV_ENV_VULKAN_1_1_SPIRV_1_4 = 21,
SPV_ENV_UNIVERSAL_1_5 = 22,
SPV_ENV_VULKAN_1_2 = 23,
}
#[repr(u32)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum spv_validator_limit {
spv_validator_limit_max_struct_members = 0,
spv_validator_limit_max_struct_depth = 1,
spv_validator_limit_max_local_variables = 2,
spv_validator_limit_max_global_variables = 3,
spv_validator_limit_max_switch_branches = 4,
spv_validator_limit_max_function_args = 5,
spv_validator_limit_max_control_flow_nesting_depth = 6,
spv_validator_limit_max_access_chain_indexes = 7,
spv_validator_limit_max_id_bound = 8,
}
extern "C" {
pub fn spvTargetEnvDescription(env: spv_target_env) -> *const ::std::os::raw::c_char;
}
extern "C" {
pub fn spvParseTargetEnv(s: *const ::std::os::raw::c_char, env: *mut spv_target_env) -> bool;
}
extern "C" {
pub fn spvParseVulkanEnv(vulkan_ver: u32, spirv_ver: u32, env: *mut spv_target_env) -> bool;
}
extern "C" {
pub fn spvContextCreate(env: spv_target_env) -> spv_context;
}
extern "C" {
pub fn spvContextDestroy(context: spv_context);
}
extern "C" {
pub fn spvValidatorOptionsCreate() -> spv_validator_options;
}
extern "C" {
pub fn spvValidatorOptionsDestroy(options: spv_validator_options);
}
extern "C" {
pub fn spvValidatorOptionsSetUniversalLimit(
options: spv_validator_options,
limit_type: spv_validator_limit,
limit: u32,
);
}
extern "C" {
pub fn spvValidatorOptionsSetRelaxStoreStruct(options: spv_validator_options, val: bool);
}
extern "C" {
pub fn spvValidatorOptionsSetRelaxLogicalPointer(options: spv_validator_options, val: bool);
}
extern "C" {
pub fn spvValidatorOptionsSetBeforeHlslLegalization(options: spv_validator_options, val: bool);
}
extern "C" {
pub fn spvValidatorOptionsSetRelaxBlockLayout(options: spv_validator_options, val: bool);
}
extern "C" {
pub fn spvValidatorOptionsSetUniformBufferStandardLayout(
options: spv_validator_options,
val: bool,
);
}
extern "C" {
pub fn spvValidatorOptionsSetScalarBlockLayout(options: spv_validator_options, val: bool);
}
extern "C" {
pub fn spvValidatorOptionsSetSkipBlockLayout(options: spv_validator_options, val: bool);
}
extern "C" {
pub fn spvOptimizerOptionsCreate() -> spv_optimizer_options;
}
extern "C" {
pub fn spvOptimizerOptionsDestroy(options: spv_optimizer_options);
}
extern "C" {
pub fn spvOptimizerOptionsSetRunValidator(options: spv_optimizer_options, val: bool);
}
extern "C" {
pub fn spvOptimizerOptionsSetValidatorOptions(
options: spv_optimizer_options,
val: spv_validator_options,
);
}
extern "C" {
pub fn spvOptimizerOptionsSetMaxIdBound(options: spv_optimizer_options, val: u32);
}
extern "C" {
pub fn spvOptimizerOptionsSetPreserveBindings(options: spv_optimizer_options, val: bool);
}
extern "C" {
pub fn spvOptimizerOptionsSetPreserveSpecConstants(options: spv_optimizer_options, val: bool);
}
extern "C" {
pub fn spvReducerOptionsCreate() -> spv_reducer_options;
}
extern "C" {
pub fn spvReducerOptionsDestroy(options: spv_reducer_options);
}
extern "C" {
pub fn spvReducerOptionsSetStepLimit(options: spv_reducer_options, step_limit: u32);
}
extern "C" {
pub fn spvReducerOptionsSetFailOnValidationError(
options: spv_reducer_options,
fail_on_validation_error: bool,
);
}
extern "C" {
pub fn spvFuzzerOptionsCreate() -> spv_fuzzer_options;
}
extern "C" {
pub fn spvFuzzerOptionsDestroy(options: spv_fuzzer_options);
}
extern "C" {
pub fn spvFuzzerOptionsEnableReplayValidation(options: spv_fuzzer_options);
}
extern "C" {
pub fn spvFuzzerOptionsSetRandomSeed(options: spv_fuzzer_options, seed: u32);
}
extern "C" {
pub fn spvFuzzerOptionsSetShrinkerStepLimit(
options: spv_fuzzer_options,
shrinker_step_limit: u32,
);
}
extern "C" {
pub fn spvFuzzerOptionsEnableFuzzerPassValidation(options: spv_fuzzer_options);
}
extern "C" {
pub fn spvTextToBinary(
context: spv_const_context,
text: *const ::std::os::raw::c_char,
length: usize,
binary: *mut spv_binary,
diagnostic: *mut spv_diagnostic,
) -> spv_result_t;
}
extern "C" {
pub fn spvTextToBinaryWithOptions(
context: spv_const_context,
text: *const ::std::os::raw::c_char,
length: usize,
options: u32,
binary: *mut spv_binary,
diagnostic: *mut spv_diagnostic,
) -> spv_result_t;
}
extern "C" {
pub fn spvTextDestroy(text: spv_text);
}
extern "C" {
pub fn spvBinaryToText(
context: spv_const_context,
binary: *const u32,
word_count: usize,
options: u32,
text: *mut spv_text,
diagnostic: *mut spv_diagnostic,
) -> spv_result_t;
}
extern "C" {
pub fn spvBinaryDestroy(binary: spv_binary);
}
extern "C" {
pub fn spvValidate(
context: spv_const_context,
binary: spv_const_binary,
diagnostic: *mut spv_diagnostic,
) -> spv_result_t;
}
extern "C" {
pub fn spvValidateWithOptions(
context: spv_const_context,
options: spv_const_validator_options,
binary: spv_const_binary,
diagnostic: *mut spv_diagnostic,
) -> spv_result_t;
}
extern "C" {
pub fn spvValidateBinary(
context: spv_const_context,
words: *const u32,
num_words: usize,
diagnostic: *mut spv_diagnostic,
) -> spv_result_t;
}
extern "C" {
pub fn spvDiagnosticCreate(
position: spv_position,
message: *const ::std::os::raw::c_char,
) -> spv_diagnostic;
}
extern "C" {
pub fn spvDiagnosticDestroy(diagnostic: spv_diagnostic);
}
extern "C" {
pub fn spvDiagnosticPrint(diagnostic: spv_diagnostic) -> spv_result_t;
}
extern "C" {
pub fn spvOpcodeString(opcode: u32) -> *const ::std::os::raw::c_char;
}
pub type spv_parsed_header_fn_t = ::std::option::Option<
unsafe extern "C" fn(
user_data: *mut ::std::os::raw::c_void,
endian: spv_endianness_t,
magic: u32,
version: u32,
generator: u32,
id_bound: u32,
reserved: u32,
) -> spv_result_t,
>;
pub type spv_parsed_instruction_fn_t = ::std::option::Option<
unsafe extern "C" fn(
user_data: *mut ::std::os::raw::c_void,
parsed_instruction: *const spv_parsed_instruction_t,
) -> spv_result_t,
>;
extern "C" {
pub fn spvBinaryParse(
context: spv_const_context,
user_data: *mut ::std::os::raw::c_void,
words: *const u32,
num_words: usize,
parse_header: spv_parsed_header_fn_t,
parse_instruction: spv_parsed_instruction_fn_t,
diagnostic: *mut spv_diagnostic,
) -> spv_result_t;
}

View File

@ -2,26 +2,25 @@
name = "zluda"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
edition = "2021"
[lib]
name = "zluda"
name = "nvcuda"
crate-type = ["cdylib"]
[dependencies]
comgr = { path = "../comgr" }
ptx_parser = { path = "../ptx_parser" }
ptx = { path = "../ptx" }
cuda_types = { path = "../cuda_types" }
cuda_base = { path = "../cuda_base" }
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
lazy_static = "1.4"
num_enum = "0.4"
lz4-sys = "1.9"
tempfile = "3"
paste = "1.0"
[dependencies.ocl-core]
version = "0.11"
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]
rustc-hash = "1.1"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["heapapi", "std"] }
[dev-dependencies]
cuda-driver-sys = "0.3.0"

View File

@ -1,3 +0,0 @@
bindgen /usr/local/cuda/include/cuda.h -o cuda.rs --whitelist-function="^cu.*" --size_t-is-usize --default-enum-style=newtype --no-layout-tests --no-doc-comments --no-derive-debug --new-type-alias "^CUdevice$|^CUdeviceptr$"
sed -i -e 's/extern "C" {//g' -e 's/-> CUresult;/-> CUresult { impl_::unsupported()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' cuda.rs
rustfmt cuda.rs

View File

@ -1,20 +0,0 @@
use env::VarError;
use std::{env, path::PathBuf};
// HACK ALERT
// This is a temporary hack to to make sure that linker does not pick up
// NVIDIA OpenCL .lib using paths injected by cl-sys
fn main() -> Result<(), VarError> {
if cfg!(windows) {
let env = env::var("CARGO_CFG_TARGET_ENV")?;
if env == "msvc" {
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
path.push("lib");
println!("cargo:rustc-link-search=native={}", path.display());
} else {
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
};
}
Ok(())
}

File diff suppressed because it is too large Load Diff

View File

@ -1,24 +1,93 @@
use std::ptr;
use super::{driver, FromCuda, ZludaObject};
use cuda_types::*;
use hip_runtime_sys::*;
use rustc_hash::FxHashSet;
use std::{cell::RefCell, ptr, sync::Mutex};
use crate::cuda::CUlimit;
use crate::cuda::CUresult;
thread_local! {
pub(crate) static CONTEXT_STACK: RefCell<Vec<(CUcontext, hipDevice_t)>> = RefCell::new(Vec::new());
}
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
if pvalue == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
*pvalue = 512; // GTX 1060 reports 1024
CUresult::CUDA_SUCCESS
} else {
CUresult::CUDA_ERROR_NOT_SUPPORTED
pub(crate) struct Context {
pub(crate) device: hipDevice_t,
pub(crate) mutable: Mutex<OwnedByContext>,
}
pub(crate) struct OwnedByContext {
pub(crate) ref_count: usize, // only used by primary context
pub(crate) _memory: FxHashSet<hipDeviceptr_t>,
pub(crate) _streams: FxHashSet<hipStream_t>,
pub(crate) _modules: FxHashSet<CUmodule>,
}
impl ZludaObject for Context {
const COOKIE: usize = 0x5f867c6d9cb73315;
type CudaHandle = CUcontext;
fn drop_checked(&mut self) -> CUresult {
Ok(())
}
}
pub(crate) fn set_limit(limit: CUlimit, value: usize) -> CUresult {
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
CUresult::CUDA_SUCCESS
} else {
CUresult::CUDA_ERROR_NOT_SUPPORTED
pub(crate) fn new(device: hipDevice_t) -> Context {
Context {
device,
mutable: Mutex::new(OwnedByContext {
ref_count: 0,
_memory: FxHashSet::default(),
_streams: FxHashSet::default(),
_modules: FxHashSet::default(),
}),
}
}
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: hipLimit_t) -> hipError_t {
unsafe { hipDeviceGetLimit(pvalue, limit) }
}
pub(crate) fn set_limit(limit: hipLimit_t, value: usize) -> hipError_t {
unsafe { hipDeviceSetLimit(limit, value) }
}
pub(crate) fn synchronize() -> hipError_t {
unsafe { hipDeviceSynchronize() }
}
pub(crate) fn get_primary(hip_dev: hipDevice_t) -> Result<(&'static Context, CUcontext), CUerror> {
let dev = driver::device(hip_dev)?;
Ok(dev.primary_context())
}
pub(crate) fn set_current(raw_ctx: CUcontext) -> CUresult {
let new_device = if raw_ctx.0 == ptr::null_mut() {
CONTEXT_STACK.with(|stack| {
let mut stack = stack.borrow_mut();
if let Some((_, old_device)) = stack.pop() {
if let Some((_, new_device)) = stack.last() {
if old_device != *new_device {
return Some(*new_device);
}
}
}
None
})
} else {
let ctx: &Context = FromCuda::from_cuda(&raw_ctx)?;
let device = ctx.device;
CONTEXT_STACK.with(move |stack| {
let mut stack = stack.borrow_mut();
let last_device = stack.last().map(|(_, dev)| *dev);
stack.push((raw_ctx, device));
match last_device {
None => Some(device),
Some(last_device) if last_device != device => Some(device),
_ => None,
}
})
};
if let Some(dev) = new_device {
unsafe { hipSetDevice(dev)? };
}
Ok(())
}

View File

@ -1,29 +1,27 @@
use super::{transmute_lifetime, transmute_lifetime_mut, CUresult};
use crate::{
cuda::{self, CUdevice, CUdevprop},
hip_call,
};
use cuda::{CUdevice_attribute, CUuuid_st};
use hip_runtime_sys::{
hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties,
};
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
use paste::paste;
use std::{
cmp,
collections::HashSet,
ffi::c_void,
mem,
os::raw::{c_char, c_int, c_uint},
ptr,
sync::atomic::{AtomicU32, Ordering},
};
use cuda_types::*;
use hip_runtime_sys::*;
use std::{mem, ptr};
const PROJECT_URL_SUFFIX_SHORT: &'static str = " [ZLUDA]";
const PROJECT_URL_SUFFIX_LONG: &'static str = " [github.com/vosen/ZLUDA]";
use super::context;
const PROJECT_SUFFIX: &[u8] = b" [ZLUDA]\0";
pub const COMPUTE_CAPABILITY_MAJOR: i32 = 8;
pub const COMPUTE_CAPABILITY_MINOR: i32 = 8;
pub(crate) fn compute_capability(major: &mut i32, minor: &mut i32, _dev: hipDevice_t) -> CUresult {
*major = COMPUTE_CAPABILITY_MAJOR;
*minor = COMPUTE_CAPABILITY_MINOR;
Ok(())
}
pub(crate) fn get(device: *mut hipDevice_t, ordinal: i32) -> hipError_t {
unsafe { hipDeviceGet(device, ordinal) }
}
#[allow(warnings)]
trait hipDeviceAttribute_t_ext {
trait DeviceAttributeNames {
const hipDeviceAttributeGpuOverlap: hipDeviceAttribute_t =
hipDeviceAttribute_t::hipDeviceAttributeDeviceOverlap;
const hipDeviceAttributeMaximumTexture1DWidth: hipDeviceAttribute_t =
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth;
const hipDeviceAttributeMaximumTexture2DWidth: hipDeviceAttribute_t =
@ -42,307 +40,300 @@ trait hipDeviceAttribute_t_ext {
hipDeviceAttribute_t::hipDeviceAttributeMaxThreadsPerMultiProcessor;
const hipDeviceAttributeAsyncEngineCount: hipDeviceAttribute_t =
hipDeviceAttribute_t::hipDeviceAttributeConcurrentKernels;
const hipDeviceAttributePciDomainId: hipDeviceAttribute_t =
hipDeviceAttribute_t::hipDeviceAttributePciDomainID;
const hipDeviceAttributeMultiGpuBoard: hipDeviceAttribute_t =
hipDeviceAttribute_t::hipDeviceAttributeIsMultiGpuBoard;
const hipDeviceAttributeMultiGpuBoardGroupId: hipDeviceAttribute_t =
hipDeviceAttribute_t::hipDeviceAttributeMultiGpuBoardGroupID;
const hipDeviceAttributeMaxSharedMemoryPerBlockOptin: hipDeviceAttribute_t =
hipDeviceAttribute_t::hipDeviceAttributeSharedMemPerBlockOptin;
}
impl hipDeviceAttribute_t_ext for hipDeviceAttribute_t {}
impl DeviceAttributeNames for hipDeviceAttribute_t {}
macro_rules! remap_attribute {
($attrib:expr => $([ $($word:expr)* ]),*,) => {
match $attrib {
$(
paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
paste::paste! { CUdevice_attribute:: [< CU_DEVICE_ATTRIBUTE $(_ $word:upper)* >] } => {
paste::paste! { hipDeviceAttribute_t:: [< hipDeviceAttribute $($word:camel)* >] }
}
)*
_ => return hipError_t::hipErrorInvalidValue
_ => return Err(hipErrorCode_t::NotSupported)
}
}
}
pub fn get_attribute(pi: *mut i32, attrib: CUdevice_attribute, dev_idx: c_int) -> hipError_t {
if pi == ptr::null_mut() {
return hipError_t::hipErrorInvalidValue;
}
//let mut props = unsafe { mem::zeroed() };
let hip_attrib = match attrib {
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT => {
unsafe { *pi = 1 };
return hipError_t::hipSuccess;
pub(crate) fn get_attribute(
pi: &mut i32,
attrib: CUdevice_attribute,
dev_idx: hipDevice_t,
) -> hipError_t {
match attrib {
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_WARP_SIZE => {
*pi = 32;
return Ok(());
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED => {
unsafe { *pi = 1 };
return hipError_t::hipSuccess;
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID => {
unsafe { *pi = 0 };
return hipError_t::hipSuccess;
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_TCC_DRIVER => {
*pi = 0;
return Ok(());
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR => {
unsafe { *pi = 8 };
return hipError_t::hipSuccess;
*pi = COMPUTE_CAPABILITY_MAJOR;
return Ok(());
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR => {
unsafe { *pi = 0 };
return hipError_t::hipSuccess;
*pi = COMPUTE_CAPABILITY_MINOR;
return Ok(());
}
_ => {}
}
let attrib = remap_attribute! {
attrib =>
[MAX THREADS PER BLOCK],
[MAX BLOCK DIM X],
[MAX BLOCK DIM Y],
[MAX BLOCK DIM Z],
[MAX GRID DIM X],
[MAX GRID DIM Y],
[MAX GRID DIM Z],
[MAX SHARED MEMORY PER BLOCK],
[TOTAL CONSTANT MEMORY],
//[WARP SIZE],
[MAX PITCH],
[MAX REGISTERS PER BLOCK],
[CLOCK RATE],
[TEXTURE ALIGNMENT],
[GPU OVERLAP],
[MULTIPROCESSOR COUNT],
[KERNEL EXEC TIMEOUT],
[INTEGRATED],
[CAN MAP HOST MEMORY],
[COMPUTE MODE],
[MAXIMUM TEXTURE1D WIDTH],
[MAXIMUM TEXTURE2D WIDTH],
[MAXIMUM TEXTURE2D HEIGHT],
[MAXIMUM TEXTURE3D WIDTH],
[MAXIMUM TEXTURE3D HEIGHT],
[MAXIMUM TEXTURE3D DEPTH],
//[MAXIMUM TEXTURE2D LAYERED WIDTH],
//[MAXIMUM TEXTURE2D LAYERED HEIGHT],
//[MAXIMUM TEXTURE2D LAYERED LAYERS],
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
[SURFACE ALIGNMENT],
[CONCURRENT KERNELS],
[ECC ENABLED],
[PCI BUS ID],
[PCI DEVICE ID],
//[TCC DRIVER],
[MEMORY CLOCK RATE],
[GLOBAL MEMORY BUS WIDTH],
[L2 CACHE SIZE],
[MAX THREADS PER MULTIPROCESSOR],
[ASYNC ENGINE COUNT],
[UNIFIED ADDRESSING],
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
//[CAN TEX2D GATHER],
//[MAXIMUM TEXTURE2D GATHER WIDTH],
//[MAXIMUM TEXTURE2D GATHER HEIGHT],
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
[PCI DOMAIN ID],
[TEXTURE PITCH ALIGNMENT],
//[MAXIMUM TEXTURECUBEMAP WIDTH],
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
//[MAXIMUM SURFACE1D WIDTH],
//[MAXIMUM SURFACE2D WIDTH],
//[MAXIMUM SURFACE2D HEIGHT],
//[MAXIMUM SURFACE3D WIDTH],
//[MAXIMUM SURFACE3D HEIGHT],
//[MAXIMUM SURFACE3D DEPTH],
//[MAXIMUM SURFACE1D LAYERED WIDTH],
//[MAXIMUM SURFACE1D LAYERED LAYERS],
//[MAXIMUM SURFACE2D LAYERED WIDTH],
//[MAXIMUM SURFACE2D LAYERED HEIGHT],
//[MAXIMUM SURFACE2D LAYERED LAYERS],
//[MAXIMUM SURFACECUBEMAP WIDTH],
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
//[MAXIMUM TEXTURE1D LINEAR WIDTH],
//[MAXIMUM TEXTURE2D LINEAR WIDTH],
//[MAXIMUM TEXTURE2D LINEAR HEIGHT],
//[MAXIMUM TEXTURE2D LINEAR PITCH],
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
//[COMPUTE CAPABILITY MAJOR],
//[COMPUTE CAPABILITY MINOR],
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
[STREAM PRIORITIES SUPPORTED],
[GLOBAL L1 CACHE SUPPORTED],
[LOCAL L1 CACHE SUPPORTED],
[MAX SHARED MEMORY PER MULTIPROCESSOR],
[MAX REGISTERS PER MULTIPROCESSOR],
[MANAGED MEMORY],
[MULTI GPU BOARD],
[MULTI GPU BOARD GROUP ID],
[HOST NATIVE ATOMIC SUPPORTED],
[SINGLE TO DOUBLE PRECISION PERF RATIO],
[PAGEABLE MEMORY ACCESS],
[CONCURRENT MANAGED ACCESS],
[COMPUTE PREEMPTION SUPPORTED],
[CAN USE HOST POINTER FOR REGISTERED MEM],
//[CAN USE STREAM MEM OPS],
[COOPERATIVE LAUNCH],
[COOPERATIVE MULTI DEVICE LAUNCH],
[MAX SHARED MEMORY PER BLOCK OPTIN],
//[CAN FLUSH REMOTE WRITES],
[HOST REGISTER SUPPORTED],
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
[DIRECT MANAGED MEM ACCESS FROM HOST],
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
//[MAX BLOCKS PER MULTIPROCESSOR],
//[GENERIC COMPRESSION SUPPORTED],
//[MAX PERSISTING L2 CACHE SIZE],
//[MAX ACCESS POLICY WINDOW SIZE],
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
//[RESERVED SHARED MEMORY PER BLOCK],
//[SPARSE CUDA ARRAY SUPPORTED],
//[READ ONLY HOST REGISTER SUPPORTED],
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
[MEMORY POOLS SUPPORTED],
//[GPU DIRECT RDMA SUPPORTED],
//[GPU DIRECT RDMA FLUSH WRITES OPTIONS],
//[GPU DIRECT RDMA WRITES ORDERING],
//[MEMPOOL SUPPORTED HANDLE TYPES],
//[CLUSTER LAUNCH],
//[DEFERRED MAPPING CUDA ARRAY SUPPORTED],
//[CAN USE 64 BIT STREAM MEM OPS],
//[CAN USE STREAM WAIT VALUE NOR],
//[DMA BUF SUPPORTED],
//[IPC EVENT SUPPORTED],
//[MEM SYNC DOMAIN COUNT],
//[TENSOR MAP ACCESS SUPPORTED],
//[HANDLE TYPE FABRIC SUPPORTED],
//[UNIFIED FUNCTION POINTERS],
//[NUMA CONFIG],
//[NUMA ID],
//[MULTICAST SUPPORTED],
//[MPS ENABLED],
//[HOST NUMA ID],
};
unsafe { hipDeviceGetAttribute(pi, attrib, dev_idx) }
}
pub(crate) fn get_uuid(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
unsafe { hipDeviceGetUuid(uuid, device) }
}
pub(crate) fn get_uuid_v2(uuid: *mut hipUUID, device: hipDevice_t) -> hipError_t {
get_uuid(uuid, device)
}
pub(crate) fn get_luid(
luid: *mut ::core::ffi::c_char,
device_node_mask: &mut ::core::ffi::c_uint,
dev: hipDevice_t,
) -> hipError_t {
let luid = unsafe {
luid.cast::<[i8; 8]>()
.as_mut()
.ok_or(hipErrorCode_t::InvalidValue)
}?;
let mut properties = unsafe { mem::zeroed() };
unsafe { hipGetDevicePropertiesR0600(&mut properties, dev) }?;
*luid = properties.luid;
*device_node_mask = properties.luidDeviceNodeMask;
Ok(())
}
pub(crate) fn get_name(
name: *mut ::core::ffi::c_char,
len: ::core::ffi::c_int,
dev: hipDevice_t,
) -> cuda_types::CUresult {
unsafe { hipDeviceGetName(name, len, dev) }?;
let len = len as usize;
let buffer = unsafe { std::slice::from_raw_parts(name, len) };
let first_zero = buffer.iter().position(|c| *c == 0);
let first_zero = if let Some(x) = first_zero {
x
} else {
return Ok(());
};
if (first_zero + PROJECT_SUFFIX.len()) > len {
return Ok(());
}
unsafe {
ptr::copy_nonoverlapping(
PROJECT_SUFFIX.as_ptr() as _,
name.add(first_zero),
PROJECT_SUFFIX.len(),
)
};
Ok(())
}
pub(crate) fn total_mem_v2(bytes: *mut usize, dev: hipDevice_t) -> hipError_t {
unsafe { hipDeviceTotalMem(bytes, dev) }
}
pub(crate) fn get_properties(prop: &mut cuda_types::CUdevprop, dev: hipDevice_t) -> hipError_t {
let mut hip_props = unsafe { mem::zeroed() };
unsafe { hipGetDevicePropertiesR0600(&mut hip_props, dev) }?;
prop.maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
prop.maxThreadsDim = hip_props.maxThreadsDim;
prop.maxGridSize = hip_props.maxGridSize;
prop.totalConstantMemory = clamp_usize(hip_props.totalConstMem);
prop.SIMDWidth = 32;
prop.memPitch = clamp_usize(hip_props.memPitch);
prop.regsPerBlock = hip_props.regsPerBlock;
prop.clockRate = hip_props.clockRate;
prop.textureAlign = clamp_usize(hip_props.textureAlignment);
Ok(())
}
pub(crate) fn get_count(count: &mut ::core::ffi::c_int) -> hipError_t {
unsafe { hipGetDeviceCount(count) }
}
fn clamp_usize(x: usize) -> i32 {
usize::min(x, i32::MAX as usize) as i32
}
pub(crate) fn primary_context_retain(
pctx: &mut CUcontext,
hip_dev: hipDevice_t,
) -> Result<(), CUerror> {
let (ctx, raw_ctx) = context::get_primary(hip_dev)?;
{
let mut mutable_ctx = ctx.mutable.lock().map_err(|_| CUerror::UNKNOWN)?;
mutable_ctx.ref_count += 1;
}
*pctx = raw_ctx;
Ok(())
}
pub(crate) fn primary_context_release(hip_dev: hipDevice_t) -> Result<(), CUerror> {
let (ctx, _) = context::get_primary(hip_dev)?;
{
let mut mutable_ctx = ctx.mutable.lock().map_err(|_| CUerror::UNKNOWN)?;
if mutable_ctx.ref_count == 0 {
return Err(CUerror::INVALID_CONTEXT);
}
mutable_ctx.ref_count -= 1;
if mutable_ctx.ref_count == 0 {
// TODO: drop all children
}
// we assume that arrayed texts have the same limits
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
}
// we treat surface the same as texture
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT => {
hipDeviceAttribute_t::hipDeviceAttributeTextureAlignment
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DHeight
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture3DDepth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DWidth
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture2DHeight
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH => {
hipDeviceAttribute_t::hipDeviceAttributeMaxTexture1DWidth
}
// Totally made up
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
| CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS => {
unsafe { *pi = u16::MAX as i32 };
return hipError_t::hipSuccess;
}
// linear sizes
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH => {
let mut prop = unsafe { mem::zeroed() };
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
if err != hipError_t::hipSuccess {
return err;
}
unsafe { *pi = prop.maxTexture1DLinear };
return hipError_t::hipSuccess;
}
CUdevice_attribute::CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID => {
let mut prop = unsafe { mem::zeroed() };
let err = unsafe { hipGetDeviceProperties(&mut prop, dev_idx) };
if err != hipError_t::hipSuccess {
return err;
}
unsafe { *pi = prop.pciDomainID };
return hipError_t::hipSuccess;
}
attrib => remap_attribute! {
attrib =>
[MAX THREADS PER BLOCK],
[MAX BLOCK DIM X],
[MAX BLOCK DIM Y],
[MAX BLOCK DIM Z],
[MAX GRID DIM X],
[MAX GRID DIM Y],
[MAX GRID DIM Z],
[MAX SHARED MEMORY PER BLOCK],
[TOTAL CONSTANT MEMORY],
[WARP SIZE],
[MAX PITCH],
[MAX REGISTERS PER BLOCK],
[CLOCK RATE],
[TEXTURE ALIGNMENT],
//[GPU OVERLAP],
[MULTIPROCESSOR COUNT],
[KERNEL EXEC TIMEOUT],
[INTEGRATED],
[CAN MAP HOST MEMORY],
[COMPUTE MODE],
[MAXIMUM TEXTURE1D WIDTH],
[MAXIMUM TEXTURE2D WIDTH],
[MAXIMUM TEXTURE2D HEIGHT],
[MAXIMUM TEXTURE3D WIDTH],
[MAXIMUM TEXTURE3D HEIGHT],
[MAXIMUM TEXTURE3D DEPTH],
//[MAXIMUM TEXTURE2D LAYERED WIDTH],
//[MAXIMUM TEXTURE2D LAYERED HEIGHT],
//[MAXIMUM TEXTURE2D LAYERED LAYERS],
//[MAXIMUM TEXTURE2D ARRAY WIDTH],
//[MAXIMUM TEXTURE2D ARRAY HEIGHT],
//[MAXIMUM TEXTURE2D ARRAY NUMSLICES],
//[SURFACE ALIGNMENT],
[CONCURRENT KERNELS],
[ECC ENABLED],
[PCI BUS ID],
[PCI DEVICE ID],
//[TCC DRIVER],
[MEMORY CLOCK RATE],
[GLOBAL MEMORY BUS WIDTH],
[L2 CACHE SIZE],
[MAX THREADS PER MULTIPROCESSOR],
[ASYNC ENGINE COUNT],
//[UNIFIED ADDRESSING],
//[MAXIMUM TEXTURE1D LAYERED WIDTH],
//[MAXIMUM TEXTURE1D LAYERED LAYERS],
//[CAN TEX2D GATHER],
//[MAXIMUM TEXTURE2D GATHER WIDTH],
//[MAXIMUM TEXTURE2D GATHER HEIGHT],
//[MAXIMUM TEXTURE3D WIDTH ALTERNATE],
//[MAXIMUM TEXTURE3D HEIGHT ALTERNATE],
//[MAXIMUM TEXTURE3D DEPTH ALTERNATE],
//[PCI DOMAIN ID],
[TEXTURE PITCH ALIGNMENT],
//[MAXIMUM TEXTURECUBEMAP WIDTH],
//[MAXIMUM TEXTURECUBEMAP LAYERED WIDTH],
//[MAXIMUM TEXTURECUBEMAP LAYERED LAYERS],
//[MAXIMUM SURFACE1D WIDTH],
//[MAXIMUM SURFACE2D WIDTH],
//[MAXIMUM SURFACE2D HEIGHT],
//[MAXIMUM SURFACE3D WIDTH],
//[MAXIMUM SURFACE3D HEIGHT],
//[MAXIMUM SURFACE3D DEPTH],
//[MAXIMUM SURFACE1D LAYERED WIDTH],
//[MAXIMUM SURFACE1D LAYERED LAYERS],
//[MAXIMUM SURFACE2D LAYERED WIDTH],
//[MAXIMUM SURFACE2D LAYERED HEIGHT],
//[MAXIMUM SURFACE2D LAYERED LAYERS],
//[MAXIMUM SURFACECUBEMAP WIDTH],
//[MAXIMUM SURFACECUBEMAP LAYERED WIDTH],
//[MAXIMUM SURFACECUBEMAP LAYERED LAYERS],
//[MAXIMUM TEXTURE1D LINEAR WIDTH],
//[MAXIMUM TEXTURE2D LINEAR WIDTH],
//[MAXIMUM TEXTURE2D LINEAR HEIGHT],
//[MAXIMUM TEXTURE2D LINEAR PITCH],
//[MAXIMUM TEXTURE2D MIPMAPPED WIDTH],
//[MAXIMUM TEXTURE2D MIPMAPPED HEIGHT],
//[COMPUTE CAPABILITY MAJOR],
//[COMPUTE CAPABILITY MINOR],
//[MAXIMUM TEXTURE1D MIPMAPPED WIDTH],
//[STREAM PRIORITIES SUPPORTED],
//[GLOBAL L1 CACHE SUPPORTED],
//[LOCAL L1 CACHE SUPPORTED],
[MAX SHARED MEMORY PER MULTIPROCESSOR],
//[MAX REGISTERS PER MULTIPROCESSOR],
[MANAGED MEMORY],
//[MULTI GPU BOARD],
//[MULTI GPU BOARD GROUP ID],
//[HOST NATIVE ATOMIC SUPPORTED],
//[SINGLE TO DOUBLE PRECISION PERF RATIO],
[PAGEABLE MEMORY ACCESS],
[CONCURRENT MANAGED ACCESS],
//[COMPUTE PREEMPTION SUPPORTED],
//[CAN USE HOST POINTER FOR REGISTERED MEM],
//[CAN USE STREAM MEM OPS],
//[CAN USE 64 BIT STREAM MEM OPS],
//[CAN USE STREAM WAIT VALUE NOR],
[COOPERATIVE LAUNCH],
[COOPERATIVE MULTI DEVICE LAUNCH],
//[MAX SHARED MEMORY PER BLOCK OPTIN],
//[CAN FLUSH REMOTE WRITES],
//[HOST REGISTER SUPPORTED],
[PAGEABLE MEMORY ACCESS USES HOST PAGE TABLES],
[DIRECT MANAGED MEM ACCESS FROM HOST],
//[VIRTUAL ADDRESS MANAGEMENT SUPPORTED],
//[VIRTUAL MEMORY MANAGEMENT SUPPORTED],
//[HANDLE TYPE POSIX FILE DESCRIPTOR SUPPORTED],
//[HANDLE TYPE WIN32 HANDLE SUPPORTED],
//[HANDLE TYPE WIN32 KMT HANDLE SUPPORTED],
//[MAX BLOCKS PER MULTIPROCESSOR],
//[GENERIC COMPRESSION SUPPORTED],
//[MAX PERSISTING L2 CACHE SIZE],
//[MAX ACCESS POLICY WINDOW SIZE],
//[GPU DIRECT RDMA WITH CUDA VMM SUPPORTED],
//[RESERVED SHARED MEMORY PER BLOCK],
//[SPARSE CUDA ARRAY SUPPORTED],
//[READ ONLY HOST REGISTER SUPPORTED],
//[TIMELINE SEMAPHORE INTEROP SUPPORTED],
//[MEMORY POOLS SUPPORTED],
},
};
unsafe { hipDeviceGetAttribute(pi, hip_attrib, dev_idx) }
}
pub fn get_uuid(uuid: *mut CUuuid_st, _dev_idx: c_int) -> Result<(), CUresult> {
unsafe {
*uuid = CUuuid_st {
bytes: mem::zeroed(),
}
};
Ok(())
}
// TODO: add support if Level 0 exposes it
pub fn get_luid(
luid: *mut c_char,
dev_node_mask: *mut c_uint,
_dev_idx: c_int,
) -> Result<(), CUresult> {
unsafe { ptr::write_bytes(luid, 0u8, 8) };
unsafe { *dev_node_mask = 0 };
Ok(())
}
pub(crate) unsafe fn get_properties(prop: *mut CUdevprop, dev: CUdevice) -> Result<(), hipError_t> {
if prop == ptr::null_mut() {
return Err(hipError_t::hipErrorInvalidValue);
}
let mut hip_props = mem::zeroed();
hip_call! { hipGetDeviceProperties(&mut hip_props, dev.0) };
(*prop).maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
(*prop).maxThreadsDim = hip_props.maxThreadsDim;
(*prop).maxGridSize = hip_props.maxGridSize;
(*prop).totalConstantMemory = usize::min(hip_props.totalConstMem, i32::MAX as usize) as i32;
(*prop).SIMDWidth = hip_props.warpSize;
(*prop).memPitch = usize::min(hip_props.memPitch, i32::MAX as usize) as i32;
(*prop).regsPerBlock = hip_props.regsPerBlock;
(*prop).clockRate = hip_props.clockRate;
(*prop).textureAlign = usize::min(hip_props.textureAlignment, i32::MAX as usize) as i32;
Ok(())
}

79
zluda/src/impl/driver.rs Normal file
View File

@ -0,0 +1,79 @@
use cuda_types::*;
use hip_runtime_sys::*;
use std::{
ffi::{CStr, CString},
mem, slice,
sync::OnceLock,
};
use crate::r#impl::context;
use super::LiveCheck;
pub(crate) struct GlobalState {
pub devices: Vec<Device>,
}
pub(crate) struct Device {
pub(crate) _comgr_isa: CString,
primary_context: LiveCheck<context::Context>,
}
impl Device {
pub(crate) fn primary_context<'a>(&'a self) -> (&'a context::Context, CUcontext) {
unsafe {
(
self.primary_context.data.assume_init_ref(),
self.primary_context.as_handle(),
)
}
}
}
pub(crate) fn device(dev: i32) -> Result<&'static Device, CUerror> {
global_state()?
.devices
.get(dev as usize)
.ok_or(CUerror::INVALID_DEVICE)
}
pub(crate) fn global_state() -> Result<&'static GlobalState, CUerror> {
static GLOBAL_STATE: OnceLock<Result<GlobalState, CUerror>> = OnceLock::new();
fn cast_slice<'a>(bytes: &'a [i8]) -> &'a [u8] {
unsafe { slice::from_raw_parts(bytes.as_ptr().cast(), bytes.len()) }
}
GLOBAL_STATE
.get_or_init(|| {
let mut device_count = 0;
unsafe { hipGetDeviceCount(&mut device_count) }?;
Ok(GlobalState {
devices: (0..device_count)
.map(|i| {
let mut props = unsafe { mem::zeroed() };
unsafe { hipGetDevicePropertiesR0600(&mut props, i) }?;
Ok::<_, CUerror>(Device {
_comgr_isa: CStr::from_bytes_until_nul(cast_slice(
&props.gcnArchName[..],
))
.map_err(|_| CUerror::UNKNOWN)?
.to_owned(),
primary_context: LiveCheck::new(context::new(i)),
})
})
.collect::<Result<Vec<_>, _>>()?,
})
})
.as_ref()
.map_err(|e| *e)
}
pub(crate) fn init(flags: ::core::ffi::c_uint) -> CUresult {
unsafe { hipInit(flags) }?;
global_state()?;
Ok(())
}
pub(crate) fn get_version(version: &mut ::core::ffi::c_int) -> CUresult {
*version = cuda_types::CUDA_VERSION as i32;
Ok(())
}

View File

@ -1,26 +1,46 @@
use hip_runtime_sys::{hipError_t, hipFuncAttribute, hipFuncGetAttribute, hipFuncGetAttributes, hipFunction_attribute, hipLaunchKernel, hipModuleLaunchKernel};
use super::{CUresult, HasLivenessCookie, LiveCheck};
use crate::cuda::{CUfunction, CUfunction_attribute, CUstream};
use ::std::os::raw::{c_uint, c_void};
use std::{mem, ptr};
use hip_runtime_sys::*;
pub(crate) fn get_attribute(
pi: *mut i32,
cu_attrib: CUfunction_attribute,
func: CUfunction,
pi: &mut i32,
cu_attrib: hipFunction_attribute,
func: hipFunction_t,
) -> hipError_t {
if pi == ptr::null_mut() || func == ptr::null_mut() {
return hipError_t::hipErrorInvalidValue;
// TODO: implement HIP_FUNC_ATTRIBUTE_PTX_VERSION
// TODO: implement HIP_FUNC_ATTRIBUTE_BINARY_VERSION
unsafe { hipFuncGetAttribute(pi, cu_attrib, func) }?;
if cu_attrib == hipFunction_attribute::HIP_FUNC_ATTRIBUTE_NUM_REGS {
*pi = (*pi).max(1);
}
Ok(())
}
pub(crate) fn launch_kernel(
f: hipFunction_t,
grid_dim_x: ::core::ffi::c_uint,
grid_dim_y: ::core::ffi::c_uint,
grid_dim_z: ::core::ffi::c_uint,
block_dim_x: ::core::ffi::c_uint,
block_dim_y: ::core::ffi::c_uint,
block_dim_z: ::core::ffi::c_uint,
shared_mem_bytes: ::core::ffi::c_uint,
stream: hipStream_t,
kernel_params: *mut *mut ::core::ffi::c_void,
extra: *mut *mut ::core::ffi::c_void,
) -> hipError_t {
// TODO: fix constants in extra
unsafe {
hipModuleLaunchKernel(
f,
grid_dim_x,
grid_dim_y,
grid_dim_z,
block_dim_x,
block_dim_y,
block_dim_z,
shared_mem_bytes,
stream,
kernel_params,
extra,
)
}
let attrib = match cu_attrib {
CUfunction_attribute::CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK => {
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
}
CUfunction_attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES => {
hipFunction_attribute::HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
}
_ => return hipError_t::hipErrorInvalidValue,
};
unsafe { hipFuncGetAttribute(pi, attrib, func as _) }
}

View File

@ -1,86 +0,0 @@
use std::{
ffi::{c_void, CStr},
mem, ptr, slice,
};
use hip_runtime_sys::{hipCtxGetDevice, hipError_t, hipGetDeviceProperties};
use crate::{
cuda::{CUjitInputType, CUjit_option, CUlinkState, CUresult},
hip_call,
};
use super::module::{self, SpirvModule};
struct LinkState {
modules: Vec<SpirvModule>,
result: Option<Vec<u8>>,
}
pub(crate) unsafe fn create(
num_options: u32,
options: *mut CUjit_option,
option_values: *mut *mut c_void,
state_out: *mut CUlinkState,
) -> CUresult {
if state_out == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
let state = Box::new(LinkState {
modules: Vec::new(),
result: None,
});
*state_out = mem::transmute(state);
CUresult::CUDA_SUCCESS
}
pub(crate) unsafe fn add_data(
state: CUlinkState,
type_: CUjitInputType,
data: *mut c_void,
size: usize,
name: *const i8,
num_options: u32,
options: *mut CUjit_option,
option_values: *mut *mut c_void,
) -> Result<(), hipError_t> {
if state == ptr::null_mut() {
return Err(hipError_t::hipErrorInvalidValue);
}
let state: *mut LinkState = mem::transmute(state);
let state = &mut *state;
// V-RAY specific hack
if state.modules.len() == 2 {
return Err(hipError_t::hipSuccess);
}
let spirv_data = SpirvModule::new_raw(data as *const _)?;
state.modules.push(spirv_data);
Ok(())
}
pub(crate) unsafe fn complete(
state: CUlinkState,
cubin_out: *mut *mut c_void,
size_out: *mut usize,
) -> Result<(), hipError_t> {
let mut dev = 0;
hip_call! { hipCtxGetDevice(&mut dev) };
let mut props = unsafe { mem::zeroed() };
hip_call! { hipGetDeviceProperties(&mut props, dev) };
let state: &mut LinkState = mem::transmute(state);
let spirv_bins = state.modules.iter().map(|m| &m.binaries[..]);
let should_link_ptx_impl = state.modules.iter().find_map(|m| m.should_link_ptx_impl);
let mut arch_binary = module::compile_amd(&props, spirv_bins, should_link_ptx_impl)
.map_err(|_| hipError_t::hipErrorUnknown)?;
let ptr = arch_binary.as_mut_ptr();
let size = arch_binary.len();
state.result = Some(arch_binary);
*cubin_out = ptr as _;
*size_out = size;
Ok(())
}
pub(crate) unsafe fn destroy(state: CUlinkState) -> CUresult {
let state: Box<LinkState> = mem::transmute(state);
CUresult::CUDA_SUCCESS
}

View File

@ -1,55 +1,35 @@
use hip_runtime_sys::{
hipDrvMemcpy3D, hipError_t, hipMemcpy3D, hipMemcpy3DParms, hipMemoryType, hipPitchedPtr,
hipPos, HIP_MEMCPY3D,
};
use std::ptr;
use hip_runtime_sys::*;
use crate::{
cuda::{CUDA_MEMCPY3D_st, CUdeviceptr, CUmemorytype, CUresult},
hip_call,
};
// TODO change HIP impl to 64 bits
pub(crate) unsafe fn copy_3d(cu_copy: *const CUDA_MEMCPY3D_st) -> Result<(), hipError_t> {
if cu_copy == ptr::null() {
return Err(hipError_t::hipErrorInvalidValue);
}
let cu_copy = *cu_copy;
let hip_copy = HIP_MEMCPY3D {
srcXInBytes: cu_copy.srcXInBytes as u32,
srcY: cu_copy.srcY as u32,
srcZ: cu_copy.srcZ as u32,
srcLOD: cu_copy.srcLOD as u32,
srcMemoryType: memory_type(cu_copy.srcMemoryType)?,
srcHost: cu_copy.srcHost,
srcDevice: cu_copy.srcDevice.0 as _,
srcArray: cu_copy.srcArray as _,
srcPitch: cu_copy.srcPitch as u32,
srcHeight: cu_copy.srcHeight as u32,
dstXInBytes: cu_copy.dstXInBytes as u32,
dstY: cu_copy.dstY as u32,
dstZ: cu_copy.dstZ as u32,
dstLOD: cu_copy.dstLOD as u32,
dstMemoryType: memory_type(cu_copy.dstMemoryType)?,
dstHost: cu_copy.dstHost,
dstDevice: cu_copy.dstDevice.0 as _,
dstArray: cu_copy.dstArray as _,
dstPitch: cu_copy.dstPitch as u32,
dstHeight: cu_copy.dstHeight as u32,
WidthInBytes: cu_copy.WidthInBytes as u32,
Height: cu_copy.Height as u32,
Depth: cu_copy.Depth as u32,
};
hip_call! { hipDrvMemcpy3D(&hip_copy) };
Ok(())
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
// TODO: parametrize for non-Geekbench
unsafe { hipMemsetD8(*dptr, 0, bytesize) }
}
pub(crate) fn memory_type(cu: CUmemorytype) -> Result<hipMemoryType, hipError_t> {
match cu {
CUmemorytype::CU_MEMORYTYPE_HOST => Ok(hipMemoryType::hipMemoryTypeHost),
CUmemorytype::CU_MEMORYTYPE_DEVICE => Ok(hipMemoryType::hipMemoryTypeDevice),
CUmemorytype::CU_MEMORYTYPE_ARRAY => Ok(hipMemoryType::hipMemoryTypeArray),
CUmemorytype::CU_MEMORYTYPE_UNIFIED => Ok(hipMemoryType::hipMemoryTypeUnified),
_ => Err(hipError_t::hipErrorInvalidValue),
}
pub(crate) fn free_v2(dptr: hipDeviceptr_t) -> hipError_t {
unsafe { hipFree(dptr.0) }
}
pub(crate) fn copy_dto_h_v2(
dst_host: *mut ::core::ffi::c_void,
src_device: hipDeviceptr_t,
byte_count: usize,
) -> hipError_t {
unsafe { hipMemcpyDtoH(dst_host, src_device, byte_count) }
}
pub(crate) fn copy_hto_d_v2(
dst_device: hipDeviceptr_t,
src_host: *const ::core::ffi::c_void,
byte_count: usize,
) -> hipError_t {
unsafe { hipMemcpyHtoD(dst_device, src_host.cast_mut(), byte_count) }
}
pub(crate) fn get_address_range_v2(
pbase: *mut hipDeviceptr_t,
psize: *mut usize,
dptr: hipDeviceptr_t,
) -> hipError_t {
unsafe { hipMemGetAddressRange(pbase, psize, dptr) }
}

View File

@ -1,230 +1,209 @@
use hip_runtime_sys::hipError_t;
use cuda_types::*;
use hip_runtime_sys::*;
use std::mem::{self, ManuallyDrop, MaybeUninit};
use crate::cuda::{CUctx_st, CUdevice, CUdeviceptr, CUfunc_st, CUmod_st, CUresult, CUstream_st};
use std::{
ffi::c_void,
mem::{self, ManuallyDrop},
os::raw::c_int,
ptr,
sync::Mutex,
sync::TryLockError,
};
#[cfg(test)]
#[macro_use]
pub mod test;
pub mod device;
pub mod export_table;
pub mod function;
#[cfg_attr(windows, path = "os_win.rs")]
#[cfg_attr(not(windows), path = "os_unix.rs")]
pub(crate) mod os;
pub(crate) mod module;
pub(crate) mod context;
pub(crate) mod memory;
pub(crate) mod link;
pub(crate) mod pointer;
pub(super) mod context;
pub(super) mod device;
pub(super) mod driver;
pub(super) mod function;
pub(super) mod memory;
pub(super) mod module;
pub(super) mod pointer;
#[cfg(debug_assertions)]
pub fn unimplemented() -> CUresult {
pub(crate) fn unimplemented() -> CUresult {
unimplemented!()
}
#[cfg(not(debug_assertions))]
pub fn unimplemented() -> CUresult {
CUresult::CUDA_ERROR_NOT_SUPPORTED
pub(crate) fn unimplemented() -> CUresult {
CUresult::ERROR_NOT_SUPPORTED
}
#[macro_export]
macro_rules! hip_call {
($expr:expr) => {
#[allow(unused_unsafe)]
{
let err = unsafe { $expr };
if err != hip_runtime_sys::hipError_t::hipSuccess {
return Result::Err(err);
pub(crate) trait FromCuda<'a, T>: Sized {
fn from_cuda(t: &'a T) -> Result<Self, CUerror>;
}
macro_rules! from_cuda_nop {
($($type_:ty),*) => {
$(
impl<'a> FromCuda<'a, $type_> for $type_ {
fn from_cuda(x: &'a $type_) -> Result<Self, CUerror> {
Ok(*x)
}
}
}
impl<'a> FromCuda<'a, *mut $type_> for &'a mut $type_ {
fn from_cuda(x: &'a *mut $type_) -> Result<Self, CUerror> {
match unsafe { x.as_mut() } {
Some(x) => Ok(x),
None => Err(CUerror::INVALID_VALUE),
}
}
}
)*
};
}
pub trait HasLivenessCookie: Sized {
macro_rules! from_cuda_transmute {
($($from:ty => $to:ty),*) => {
$(
impl<'a> FromCuda<'a, $from> for $to {
fn from_cuda(x: &'a $from) -> Result<Self, CUerror> {
Ok(unsafe { std::mem::transmute(*x) })
}
}
impl<'a> FromCuda<'a, *mut $from> for &'a mut $to {
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
match unsafe { x.cast::<$to>().as_mut() } {
Some(x) => Ok(x),
None => Err(CUerror::INVALID_VALUE),
}
}
}
impl<'a> FromCuda<'a, *mut $from> for * mut $to {
fn from_cuda(x: &'a *mut $from) -> Result<Self, CUerror> {
Ok(x.cast::<$to>())
}
}
)*
};
}
macro_rules! from_cuda_object {
($($type_:ty),*) => {
$(
impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for <$type_ as ZludaObject>::CudaHandle {
fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<<$type_ as ZludaObject>::CudaHandle, CUerror> {
Ok(*handle)
}
}
impl<'a> FromCuda<'a, *mut <$type_ as ZludaObject>::CudaHandle> for &'a mut <$type_ as ZludaObject>::CudaHandle {
fn from_cuda(handle: &'a *mut <$type_ as ZludaObject>::CudaHandle) -> Result<&'a mut <$type_ as ZludaObject>::CudaHandle, CUerror> {
match unsafe { handle.as_mut() } {
Some(x) => Ok(x),
None => Err(CUerror::INVALID_VALUE),
}
}
}
impl<'a> FromCuda<'a, <$type_ as ZludaObject>::CudaHandle> for &'a $type_ {
fn from_cuda(handle: &'a <$type_ as ZludaObject>::CudaHandle) -> Result<&'a $type_, CUerror> {
Ok(as_ref(handle).as_result()?)
}
}
)*
};
}
from_cuda_nop!(
*mut i8,
*mut i32,
*mut usize,
*const ::core::ffi::c_void,
*const ::core::ffi::c_char,
*mut ::core::ffi::c_void,
*mut *mut ::core::ffi::c_void,
i32,
u32,
usize,
cuda_types::CUdevprop,
CUdevice_attribute
);
from_cuda_transmute!(
CUuuid => hipUUID,
CUfunction => hipFunction_t,
CUfunction_attribute => hipFunction_attribute,
CUstream => hipStream_t,
CUpointer_attribute => hipPointer_attribute,
CUdeviceptr_v2 => hipDeviceptr_t
);
from_cuda_object!(module::Module, context::Context);
impl<'a> FromCuda<'a, CUlimit> for hipLimit_t {
fn from_cuda(limit: &'a CUlimit) -> Result<Self, CUerror> {
Ok(match *limit {
CUlimit::CU_LIMIT_STACK_SIZE => hipLimit_t::hipLimitStackSize,
CUlimit::CU_LIMIT_PRINTF_FIFO_SIZE => hipLimit_t::hipLimitPrintfFifoSize,
CUlimit::CU_LIMIT_MALLOC_HEAP_SIZE => hipLimit_t::hipLimitMallocHeapSize,
_ => return Err(CUerror::NOT_SUPPORTED),
})
}
}
pub(crate) trait ZludaObject: Sized + Send + Sync {
const COOKIE: usize;
const LIVENESS_FAIL: CUresult;
const LIVENESS_FAIL: CUerror = cuda_types::CUerror::INVALID_VALUE;
fn try_drop(&mut self) -> Result<(), CUresult>;
type CudaHandle: Sized;
fn drop_checked(&mut self) -> CUresult;
fn wrap(self) -> Self::CudaHandle {
unsafe { mem::transmute_copy(&LiveCheck::wrap(self)) }
}
}
// This struct is a best-effort check if wrapped value has been dropped,
// while it's inherently safe, its use coming from FFI is very unsafe
#[repr(C)]
pub struct LiveCheck<T: HasLivenessCookie> {
pub(crate) struct LiveCheck<T: ZludaObject> {
cookie: usize,
data: ManuallyDrop<T>,
data: MaybeUninit<T>,
}
impl<T: HasLivenessCookie> LiveCheck<T> {
pub fn new(data: T) -> Self {
impl<T: ZludaObject> LiveCheck<T> {
fn new(data: T) -> Self {
LiveCheck {
cookie: T::COOKIE,
data: ManuallyDrop::new(data),
data: MaybeUninit::new(data),
}
}
fn destroy_impl(this: *mut Self) -> Result<(), CUresult> {
let mut ctx_box = ManuallyDrop::new(unsafe { Box::from_raw(this) });
ctx_box.try_drop()?;
unsafe { ManuallyDrop::drop(&mut ctx_box) };
Ok(())
fn as_handle(&self) -> T::CudaHandle {
unsafe { mem::transmute_copy(&self) }
}
unsafe fn ptr_from_inner(this: *mut T) -> *mut Self {
let outer_ptr = (this as *mut u8).sub(mem::size_of::<usize>());
outer_ptr as *mut Self
fn wrap(data: T) -> *mut Self {
Box::into_raw(Box::new(Self::new(data)))
}
pub unsafe fn as_ref_unchecked(&self) -> &T {
&self.data
}
pub fn as_option_mut(&mut self) -> Option<&mut T> {
fn as_result(&self) -> Result<&T, CUerror> {
if self.cookie == T::COOKIE {
Some(&mut self.data)
} else {
None
}
}
pub fn as_result(&self) -> Result<&T, CUresult> {
if self.cookie == T::COOKIE {
Ok(&self.data)
} else {
Err(T::LIVENESS_FAIL)
}
}
pub fn as_result_mut(&mut self) -> Result<&mut T, CUresult> {
if self.cookie == T::COOKIE {
Ok(&mut self.data)
Ok(unsafe { self.data.assume_init_ref() })
} else {
Err(T::LIVENESS_FAIL)
}
}
// This looks like nonsense, but it's not. There are two cases:
// Err(CUerror) -> meaning that the object is invalid, this pointer does not point into valid memory
// Ok(maybe_error) -> meaning that the object is valid, we dropped everything, but there *might*
// an error in the underlying runtime that we want to propagate
#[must_use]
pub fn try_drop(&mut self) -> Result<(), CUresult> {
fn drop_checked(&mut self) -> Result<Result<(), CUerror>, CUerror> {
if self.cookie == T::COOKIE {
self.cookie = 0;
self.data.try_drop()?;
unsafe { ManuallyDrop::drop(&mut self.data) };
return Ok(());
}
Err(T::LIVENESS_FAIL)
}
}
impl<T: HasLivenessCookie> Drop for LiveCheck<T> {
fn drop(&mut self) {
self.cookie = 0;
}
}
pub trait CudaRepr: Sized {
type Impl: Sized;
}
impl<T: CudaRepr> CudaRepr for *mut T {
type Impl = *mut T::Impl;
}
pub trait Decuda<To> {
fn decuda(self: Self) -> To;
}
impl<T: CudaRepr> Decuda<*mut T::Impl> for *mut T {
fn decuda(self: Self) -> *mut T::Impl {
self as *mut _
}
}
impl<T> From<TryLockError<T>> for CUresult {
fn from(_: TryLockError<T>) -> Self {
CUresult::CUDA_ERROR_ILLEGAL_STATE
}
}
impl From<ocl_core::Error> for CUresult {
fn from(result: ocl_core::Error) -> Self {
match result {
_ => CUresult::CUDA_ERROR_UNKNOWN,
let result = unsafe { self.data.assume_init_mut().drop_checked() };
unsafe { MaybeUninit::assume_init_drop(&mut self.data) };
Ok(result)
} else {
Err(T::LIVENESS_FAIL)
}
}
}
impl From<hip_runtime_sys::hipError_t> for CUresult {
fn from(result: hip_runtime_sys::hipError_t) -> Self {
match result {
hip_runtime_sys::hipError_t::hipErrorRuntimeMemory
| hip_runtime_sys::hipError_t::hipErrorRuntimeOther => CUresult::CUDA_ERROR_UNKNOWN,
hip_runtime_sys::hipError_t(e) => CUresult(e),
}
}
pub fn as_ref<'a, T: ZludaObject>(
handle: &'a T::CudaHandle,
) -> &'a ManuallyDrop<Box<LiveCheck<T>>> {
unsafe { mem::transmute(handle) }
}
pub trait Encuda {
type To: Sized;
fn encuda(self: Self) -> Self::To;
}
impl Encuda for CUresult {
type To = CUresult;
fn encuda(self: Self) -> Self::To {
self
}
}
impl Encuda for () {
type To = CUresult;
fn encuda(self: Self) -> Self::To {
CUresult::CUDA_SUCCESS
}
}
impl<T1: Encuda<To = CUresult>, T2: Encuda<To = CUresult>> Encuda for Result<T1, T2> {
type To = CUresult;
fn encuda(self: Self) -> Self::To {
match self {
Ok(e) => e.encuda(),
Err(e) => e.encuda(),
}
}
}
impl Encuda for hipError_t {
type To = CUresult;
fn encuda(self: Self) -> Self::To {
self.into()
}
}
unsafe fn transmute_lifetime<'a, 'b, T: ?Sized>(t: &'a T) -> &'b T {
mem::transmute(t)
}
unsafe fn transmute_lifetime_mut<'a, 'b, T: ?Sized>(t: &'a mut T) -> &'b mut T {
mem::transmute(t)
}
pub fn driver_get_version() -> c_int {
i32::max_value()
}
impl<'a> CudaRepr for CUdeviceptr {
type Impl = *mut c_void;
}
impl Decuda<*mut c_void> for CUdeviceptr {
fn decuda(self) -> *mut c_void {
self.0 as *mut _
}
pub fn drop_checked<T: ZludaObject>(handle: T::CudaHandle) -> Result<(), CUerror> {
let mut wrapped_object: ManuallyDrop<Box<LiveCheck<T>>> =
unsafe { mem::transmute_copy(&handle) };
let underlying_error = LiveCheck::drop_checked(&mut wrapped_object)?;
unsafe { ManuallyDrop::drop(&mut wrapped_object) };
underlying_error
}

View File

@ -1,261 +1,53 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::ffi::{CStr, CString};
use std::fs::File;
use std::io::{self, Read, Write};
use std::ops::Add;
use std::os::raw::c_char;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::{env, fs, iter, mem, ptr, slice};
use super::ZludaObject;
use cuda_types::*;
use hip_runtime_sys::*;
use std::{ffi::CStr, mem};
use hip_runtime_sys::{
hipCtxGetCurrent, hipCtxGetDevice, hipDeviceGetAttribute, hipDeviceGetName, hipDeviceProp_t,
hipError_t, hipGetDeviceProperties, hipGetStreamDeviceId, hipModuleLoadData,
};
use tempfile::NamedTempFile;
use crate::cuda::CUmodule;
use crate::hip_call;
pub struct SpirvModule {
pub binaries: Vec<u32>,
pub kernel_info: HashMap<String, ptx::KernelInfo>,
pub should_link_ptx_impl: Option<(&'static [u8], &'static [u8])>,
pub build_options: CString,
pub(crate) struct Module {
base: hipModule_t,
}
impl SpirvModule {
pub fn new_raw<'a>(text: *const c_char) -> Result<Self, hipError_t> {
let u8_text = unsafe { CStr::from_ptr(text) };
let ptx_text = u8_text
.to_str()
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
Self::new(ptx_text)
}
impl ZludaObject for Module {
const COOKIE: usize = 0xe9138bd040487d4a;
pub fn new<'a>(ptx_text: &str) -> Result<Self, hipError_t> {
let mut errors = Vec::new();
let ast = ptx::ModuleParser::new()
.parse(&mut errors, ptx_text)
.map_err(|_| hipError_t::hipErrorInvalidImage)?;
if errors.len() > 0 {
return Err(hipError_t::hipErrorInvalidImage);
}
let spirv_module =
ptx::to_spirv_module(ast).map_err(|_| hipError_t::hipErrorInvalidImage)?;
Ok(SpirvModule {
binaries: spirv_module.assemble(),
kernel_info: spirv_module.kernel_info,
should_link_ptx_impl: spirv_module.should_link_ptx_impl,
build_options: spirv_module.build_options,
})
type CudaHandle = CUmodule;
fn drop_checked(&mut self) -> CUresult {
unsafe { hipModuleUnload(self.base) }?;
Ok(())
}
}
pub(crate) fn load(module: *mut CUmodule, fname: *const i8) -> Result<(), hipError_t> {
let file_name = unsafe { CStr::from_ptr(fname) }
pub(crate) fn load_data(module: &mut CUmodule, image: *const std::ffi::c_void) -> CUresult {
let text = unsafe { CStr::from_ptr(image.cast()) }
.to_str()
.map_err(|_| hipError_t::hipErrorInvalidValue)?;
let mut file = File::open(file_name).map_err(|_| hipError_t::hipErrorFileNotFound)?;
let mut file_buffer = Vec::new();
file.read_to_end(&mut file_buffer)
.map_err(|_| hipError_t::hipErrorUnknown)?;
let result = load_data(module, file_buffer.as_ptr() as _);
drop(file_buffer);
result
}
pub(crate) fn load_data(
module: *mut CUmodule,
image: *const std::ffi::c_void,
) -> Result<(), hipError_t> {
if image == ptr::null() {
return Err(hipError_t::hipErrorInvalidValue);
}
if unsafe { *(image as *const u32) } == 0x464c457f {
return match unsafe { hipModuleLoadData(module as _, image) } {
hipError_t::hipSuccess => Ok(()),
e => Err(e),
};
}
let spirv_data = SpirvModule::new_raw(image as *const _)?;
load_data_impl(module, spirv_data)
}
pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<(), hipError_t> {
.map_err(|_| CUerror::INVALID_VALUE)?;
let ast = ptx_parser::parse_module_checked(text).map_err(|_| CUerror::NO_BINARY_FOR_GPU)?;
let llvm_module = ptx::to_llvm_module(ast).map_err(|_| CUerror::UNKNOWN)?;
let mut dev = 0;
hip_call! { hipCtxGetDevice(&mut dev) };
unsafe { hipCtxGetDevice(&mut dev) }?;
let mut props = unsafe { mem::zeroed() };
hip_call! { hipGetDeviceProperties(&mut props, dev) };
let arch_binary = compile_amd(
&props,
iter::once(&spirv_data.binaries[..]),
spirv_data.should_link_ptx_impl,
unsafe { hipGetDevicePropertiesR0600(&mut props, dev) }?;
let elf_module = comgr::compile_bitcode(
unsafe { CStr::from_ptr(props.gcnArchName.as_ptr()) },
&*llvm_module.llvm_ir,
llvm_module.linked_bitcode(),
)
.map_err(|_| hipError_t::hipErrorUnknown)?;
hip_call! { hipModuleLoadData(pmod as _, arch_binary.as_ptr() as _) };
.map_err(|_| CUerror::UNKNOWN)?;
let mut hip_module = unsafe { mem::zeroed() };
unsafe { hipModuleLoadData(&mut hip_module, elf_module.as_ptr().cast()) }?;
*module = Module { base: hip_module }.wrap();
Ok(())
}
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
const AMDGPU: &'static str = "/opt/rocm/";
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
const AMDGPU_BITCODE: [&'static str; 8] = [
"opencl.bc",
"ocml.bc",
"ockl.bc",
"oclc_correctly_rounded_sqrt_off.bc",
"oclc_daz_opt_on.bc",
"oclc_finite_only_off.bc",
"oclc_unsafe_math_off.bc",
"oclc_wavefrontsize64_off.bc",
];
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
pub(crate) fn compile_amd<'a>(
device_pros: &hipDeviceProp_t,
spirv_il: impl Iterator<Item = &'a [u32]>,
ptx_lib: Option<(&'static [u8], &'static [u8])>,
) -> io::Result<Vec<u8>> {
let null_terminator = device_pros
.gcnArchName
.iter()
.position(|&x| x == 0)
.unwrap();
let gcn_arch_slice = unsafe {
slice::from_raw_parts(device_pros.gcnArchName.as_ptr() as _, null_terminator + 1)
};
let device_name =
if let Ok(Ok(name)) = CStr::from_bytes_with_nul(gcn_arch_slice).map(|x| x.to_str()) {
name
} else {
return Err(io::Error::new(io::ErrorKind::Other, ""));
};
let dir = tempfile::tempdir()?;
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
Ok(path) => Cow::Owned(path),
Err(_) => Cow::Borrowed(LLVM_SPIRV),
};
let llvm_files = spirv_il
.map(|spirv| {
let mut spirv_file = NamedTempFile::new_in(&dir)?;
let spirv_u8 = unsafe {
slice::from_raw_parts(
spirv.as_ptr() as *const u8,
spirv.len() * mem::size_of::<u32>(),
)
};
spirv_file.write_all(spirv_u8)?;
if cfg!(debug_assertions) {
persist_file(spirv_file.path())?;
}
let llvm = NamedTempFile::new_in(&dir)?;
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
//.arg("--spirv-debug")
.arg("-r")
.arg("-o")
.arg(llvm.path())
.arg(spirv_file.path())
.status()?;
assert!(to_llvm_cmd.success());
if cfg!(debug_assertions) {
persist_file(llvm.path())?;
}
Ok::<_, io::Error>(llvm)
})
.collect::<Result<Vec<_>, _>>()?;
let linked_binary = NamedTempFile::new_in(&dir)?;
let mut llvm_link = PathBuf::from(AMDGPU);
llvm_link.push("llvm");
llvm_link.push("bin");
llvm_link.push("llvm-link");
let mut linker_cmd = Command::new(&llvm_link);
linker_cmd
.arg("-o")
.arg(linked_binary.path())
.args(llvm_files.iter().map(|f| f.path()))
.args(get_bitcode_paths(device_name));
if cfg!(debug_assertions) {
linker_cmd.arg("-v");
}
let status = linker_cmd.status()?;
assert!(status.success());
if cfg!(debug_assertions) {
persist_file(linked_binary.path())?;
}
let mut ptx_lib_bitcode = NamedTempFile::new_in(&dir)?;
let compiled_binary = NamedTempFile::new_in(&dir)?;
let mut clang_exe = PathBuf::from(AMDGPU);
clang_exe.push("llvm");
clang_exe.push("bin");
clang_exe.push("clang");
let mut compiler_cmd = Command::new(&clang_exe);
compiler_cmd
.arg(format!("-mcpu={}", device_name))
.arg("-ffp-contract=off")
.arg("-nogpulib")
.arg("-mno-wavefrontsize64")
.arg("-O3")
.arg("-Xclang")
.arg("-O3")
.arg("-Xlinker")
.arg("--no-undefined")
.arg("-target")
.arg(AMDGPU_TARGET)
.arg("-o")
.arg(compiled_binary.path())
.arg("-x")
.arg("ir")
.arg(linked_binary.path());
if let Some((_, bitcode)) = ptx_lib {
ptx_lib_bitcode.write_all(bitcode)?;
compiler_cmd.arg(ptx_lib_bitcode.path());
};
if cfg!(debug_assertions) {
compiler_cmd.arg("-v");
}
let status = compiler_cmd.status()?;
assert!(status.success());
let mut result = Vec::new();
let compiled_bin_path = compiled_binary.path();
let mut compiled_binary = File::open(compiled_bin_path)?;
compiled_binary.read_to_end(&mut result)?;
if cfg!(debug_assertions) {
persist_file(compiled_bin_path)?;
}
Ok(result)
pub(crate) fn unload(hmod: CUmodule) -> CUresult {
super::drop_checked::<Module>(hmod)
}
fn persist_file(path: &Path) -> io::Result<()> {
let mut persistent = PathBuf::from("/tmp/zluda");
std::fs::create_dir_all(&persistent)?;
persistent.push(path.file_name().unwrap());
std::fs::copy(path, persistent)?;
Ok(())
}
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
let mut path = PathBuf::from(AMDGPU);
path.push("amdgcn");
path.push("bitcode");
path.push(x);
path
});
let suffix = if let Some(suffix_idx) = device_name.find(':') {
suffix_idx
} else {
device_name.len()
};
let mut additional_path = PathBuf::from(AMDGPU);
additional_path.push("amdgcn");
additional_path.push("bitcode");
additional_path.push(format!(
"{}{}{}",
AMDGPU_BITCODE_DEVICE_PREFIX,
&device_name[3..suffix],
".bc"
));
generic_paths.chain(std::iter::once(additional_path))
pub(crate) fn get_function(
hfunc: &mut hipFunction_t,
hmod: &Module,
name: *const ::core::ffi::c_char,
) -> hipError_t {
unsafe { hipModuleGetFunction(hfunc, hmod.base, name) }
}

View File

@ -1,53 +1,40 @@
use std::{ffi::c_void, mem, ptr};
use hip_runtime_sys::{hipError_t, hipMemoryType, hipPointerGetAttributes};
use crate::{
cuda::{CUdeviceptr, CUmemorytype, CUpointer_attribute},
hip_call,
};
use cuda_types::*;
use hip_runtime_sys::*;
use std::{ffi::c_void, ptr};
pub(crate) unsafe fn get_attribute(
data: *mut c_void,
attribute: CUpointer_attribute,
ptr: CUdeviceptr,
) -> Result<(), hipError_t> {
attribute: hipPointer_attribute,
ptr: hipDeviceptr_t,
) -> hipError_t {
if data == ptr::null_mut() {
return Err(hipError_t::hipErrorInvalidValue);
return hipError_t::ErrorInvalidValue;
}
let mut attribs = mem::zeroed();
hip_call! { hipPointerGetAttributes(&mut attribs, ptr.0 as _) };
match attribute {
CUpointer_attribute::CU_POINTER_ATTRIBUTE_CONTEXT => {
*(data as *mut _) = attribs.device;
// TODO: implement by getting device ordinal & allocation start,
// then go through every context for that device
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_CONTEXT => hipError_t::ErrorNotSupported,
hipPointer_attribute::HIP_POINTER_ATTRIBUTE_MEMORY_TYPE => {
let mut hip_result = hipMemoryType(0);
hipPointerGetAttribute(
(&mut hip_result as *mut hipMemoryType).cast::<c_void>(),
attribute,
ptr,
)?;
let cuda_result = memory_type(hip_result)?;
unsafe { *(data.cast()) = cuda_result };
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_TYPE => {
*(data as *mut _) = memory_type(attribs.memoryType)?;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_POINTER => {
*(data as *mut _) = attribs.devicePointer;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_HOST_POINTER => {
*(data as *mut _) = attribs.hostPointer;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_MANAGED => {
*(data as *mut _) = attribs.isManaged;
Ok(())
}
_ => Err(hipError_t::hipErrorNotSupported),
_ => unsafe { hipPointerGetAttribute(data, attribute, ptr) },
}
}
pub(crate) fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipError_t> {
fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipErrorCode_t> {
match cu {
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
_ => Err(hipError_t::hipErrorInvalidValue),
_ => Err(hipErrorCode_t::InvalidValue),
}
}

View File

@ -1,157 +0,0 @@
#![allow(non_snake_case)]
use crate::cuda as zluda;
use crate::cuda::CUstream;
use crate::cuda::CUuuid;
use crate::{
cuda::{CUdevice, CUdeviceptr},
r#impl::CUresult,
};
use ::std::{
ffi::c_void,
os::raw::{c_int, c_uint},
};
use cuda_driver_sys as cuda;
#[macro_export]
macro_rules! cuda_driver_test {
($func:ident) => {
paste! {
#[test]
fn [<$func _zluda>]() {
$func::<crate::r#impl::test::Zluda>()
}
#[test]
fn [<$func _cuda>]() {
$func::<crate::r#impl::test::Cuda>()
}
}
};
}
pub trait CudaDriverFns {
fn cuInit(flags: c_uint) -> CUresult;
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult;
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult;
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult;
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult;
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult;
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult;
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult;
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult;
fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult;
fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult;
fn cuMemFree_v2(mem: *mut c_void) -> CUresult;
fn cuStreamDestroy_v2(stream: CUstream) -> CUresult;
}
pub struct Zluda();
impl CudaDriverFns for Zluda {
fn cuInit(_flags: c_uint) -> CUresult {
zluda::cuInit(_flags as _)
}
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult {
zluda::cuCtxCreate_v2(pctx as *mut _, flags, CUdevice(dev))
}
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult {
zluda::cuCtxDestroy_v2(ctx as *mut _)
}
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult {
zluda::cuCtxPopCurrent_v2(pctx as *mut _)
}
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult {
zluda::cuCtxGetApiVersion(ctx as *mut _, version)
}
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult {
zluda::cuCtxGetCurrent(pctx as *mut _)
}
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult {
zluda::cuMemAlloc_v2(dptr as *mut _, bytesize)
}
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult {
zluda::cuDeviceGetUuid(uuid, CUdevice(dev))
}
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
zluda::cuDevicePrimaryCtxGetState(CUdevice(dev), flags, active)
}
fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult {
zluda::cuStreamGetCtx(hStream, pctx as _)
}
fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult {
zluda::cuStreamCreate(stream, flags)
}
fn cuMemFree_v2(dptr: *mut c_void) -> CUresult {
zluda::cuMemFree_v2(CUdeviceptr(dptr as _))
}
fn cuStreamDestroy_v2(stream: CUstream) -> CUresult {
zluda::cuStreamDestroy_v2(stream)
}
}
pub struct Cuda();
impl CudaDriverFns for Cuda {
fn cuInit(flags: c_uint) -> CUresult {
unsafe { CUresult(cuda::cuInit(flags) as c_uint) }
}
fn cuCtxCreate_v2(pctx: *mut *mut c_void, flags: c_uint, dev: c_int) -> CUresult {
unsafe { CUresult(cuda::cuCtxCreate_v2(pctx as *mut _, flags, dev) as c_uint) }
}
fn cuCtxDestroy_v2(ctx: *mut c_void) -> CUresult {
unsafe { CUresult(cuda::cuCtxDestroy_v2(ctx as *mut _) as c_uint) }
}
fn cuCtxPopCurrent_v2(pctx: *mut *mut c_void) -> CUresult {
unsafe { CUresult(cuda::cuCtxPopCurrent_v2(pctx as *mut _) as c_uint) }
}
fn cuCtxGetApiVersion(ctx: *mut c_void, version: *mut c_uint) -> CUresult {
unsafe { CUresult(cuda::cuCtxGetApiVersion(ctx as *mut _, version) as c_uint) }
}
fn cuCtxGetCurrent(pctx: *mut *mut c_void) -> CUresult {
unsafe { CUresult(cuda::cuCtxGetCurrent(pctx as *mut _) as c_uint) }
}
fn cuMemAlloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> CUresult {
unsafe { CUresult(cuda::cuMemAlloc_v2(dptr as *mut _, bytesize) as c_uint) }
}
fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: c_int) -> CUresult {
unsafe { CUresult(cuda::cuDeviceGetUuid(uuid as *mut _, dev) as c_uint) }
}
fn cuDevicePrimaryCtxGetState(dev: c_int, flags: *mut c_uint, active: *mut c_int) -> CUresult {
unsafe { CUresult(cuda::cuDevicePrimaryCtxGetState(dev, flags, active) as c_uint) }
}
fn cuStreamGetCtx(hStream: CUstream, pctx: *mut *mut c_void) -> CUresult {
unsafe { CUresult(cuda::cuStreamGetCtx(hStream as _, pctx as _) as c_uint) }
}
fn cuStreamCreate(stream: *mut CUstream, flags: c_uint) -> CUresult {
unsafe { CUresult(cuda::cuStreamCreate(stream as _, flags as _) as c_uint) }
}
fn cuMemFree_v2(mem: *mut c_void) -> CUresult {
unsafe { CUresult(cuda::cuMemFree_v2(mem as _) as c_uint) }
}
fn cuStreamDestroy_v2(stream: CUstream) -> CUresult {
unsafe { CUresult(cuda::cuStreamDestroy_v2(stream as _) as c_uint) }
}
}

View File

@ -1,13 +1,79 @@
#[macro_use]
extern crate lazy_static;
#[cfg(test)]
extern crate cuda_driver_sys;
#[cfg(test)]
#[macro_use]
extern crate paste;
extern crate ptx;
#[allow(warnings)]
pub mod cuda;
mod cuda_impl;
pub(crate) mod r#impl;
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::unimplemented()
}
)*
};
}
macro_rules! implemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
cuda_base::cuda_normalize_fn!( crate::r#impl::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda(&$arg_id)?),*)?;
Ok(())
}
)*
};
}
macro_rules! implemented_in_function {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
cuda_base::cuda_normalize_fn!( crate::r#impl::function::$fn_name ) ($(crate::r#impl::FromCuda::from_cuda(&$arg_id)?),*)?;
Ok(())
}
)*
};
}
cuda_base::cuda_function_declarations!(
unimplemented,
implemented <= [
cuCtxGetLimit,
cuCtxSetCurrent,
cuCtxSetLimit,
cuCtxSynchronize,
cuDeviceComputeCapability,
cuDeviceGet,
cuDeviceGetAttribute,
cuDeviceGetCount,
cuDeviceGetLuid,
cuDeviceGetName,
cuDevicePrimaryCtxRelease,
cuDevicePrimaryCtxRetain,
cuDeviceGetProperties,
cuDeviceGetUuid,
cuDeviceGetUuid_v2,
cuDeviceTotalMem_v2,
cuDriverGetVersion,
cuFuncGetAttribute,
cuInit,
cuMemAlloc_v2,
cuMemFree_v2,
cuMemcpyDtoH_v2,
cuMemcpyHtoD_v2,
cuModuleGetFunction,
cuModuleLoadData,
cuModuleUnload,
cuPointerGetAttribute,
cuMemGetAddressRange_v2,
],
implemented_in_function <= [
cuLaunchKernel,
]
);

12
zluda_bindgen/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "zluda_bindgen"
version = "0.1.0"
edition = "2021"
[dependencies]
bindgen = "0.70"
syn = { version = "2.0", features = ["full", "visit-mut"] }
proc-macro2 = "1.0.89"
quote = "1.0"
prettyplease = "0.2.25"
rustc-hash = "1.1.0"

View File

@ -0,0 +1,7 @@
#define __CUDA_API_VERSION_INTERNAL
#include <cuda.h>
#include <cudaProfiler.h>
#include <cudaGL.h>
#include <cudaEGL.h>
#include <vdpau/vdpau.h>
#include <cudaVDPAU.h>

703
zluda_bindgen/src/main.rs Normal file
View File

@ -0,0 +1,703 @@
use proc_macro2::Span;
use quote::{format_ident, quote, ToTokens};
use rustc_hash::{FxHashMap, FxHashSet};
use std::{collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::FromStr};
use syn::{
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr, Path,
PathArguments, Signature, Type, TypePath, UseTree,
};
fn main() {
let crate_root = PathBuf::from_str(env!("CARGO_MANIFEST_DIR")).unwrap();
generate_hip_runtime(
&crate_root,
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
);
let cuda_header = bindgen::Builder::default()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
.layout_tests(false)
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
is_global: false,
})
.derive_hash(true)
.derive_eq(true)
.header_contents("cuda_wrapper.h", include_str!("../build/cuda_wrapper.h"))
.allowlist_type("^CU.*")
.allowlist_function("^cu.*")
.allowlist_var("^CU.*")
.must_use_type("cudaError_enum")
.constified_enum("cudaError_enum")
.no_partialeq("CUDA_HOST_NODE_PARAMS_st")
.new_type_alias(r"^CUdeviceptr_v\d+$")
.new_type_alias(r"^CUcontext$")
.new_type_alias(r"^CUstream$")
.new_type_alias(r"^CUmodule$")
.new_type_alias(r"^CUfunction$")
.new_type_alias(r"^CUlibrary$")
.clang_args(["-I/usr/local/cuda/include"])
.generate()
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cuda_header).unwrap();
generate_functions(&crate_root, &["..", "cuda_base", "src", "cuda.rs"], &module);
generate_types(&crate_root, &["..", "cuda_types", "src", "lib.rs"], &module);
generate_display(
&crate_root,
&["..", "zluda_dump", "src", "format_generated.rs"],
"cuda_types",
&module,
)
}
fn generate_hip_runtime(output: &PathBuf, path: &[&str]) {
let hiprt_header = bindgen::Builder::default()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
.layout_tests(false)
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
is_global: false,
})
.derive_hash(true)
.derive_eq(true)
.header("/opt/rocm/include/hip/hip_runtime_api.h")
.allowlist_type("^hip.*")
.allowlist_function("^hip.*")
.allowlist_var("^hip.*")
.must_use_type("hipError_t")
.constified_enum("hipError_t")
.new_type_alias("^hipDeviceptr_t$")
.new_type_alias("^hipStream_t$")
.new_type_alias("^hipModule_t$")
.new_type_alias("^hipFunction_t$")
.clang_args(["-I/opt/rocm/include", "-D__HIP_PLATFORM_AMD__"])
.generate()
.unwrap()
.to_string();
let mut module: syn::File = syn::parse_str(&hiprt_header).unwrap();
let mut converter = ConvertIntoRustResult {
type_: "hipError_t",
underlying_type: "hipError_t",
new_error_type: "hipErrorCode_t",
error_prefix: ("hipError", "Error"),
success: ("hipSuccess", "Success"),
constants: Vec::new(),
};
module.items = module
.items
.into_iter()
.filter_map(|item| match item {
Item::Const(const_) => converter.get_const(const_).map(Item::Const),
Item::Use(use_) => converter.get_use(use_).map(Item::Use),
Item::Type(type_) => converter.get_type(type_).map(Item::Type),
item => Some(item),
})
.collect::<Vec<_>>();
converter.flush(&mut module.items);
add_send_sync(
&mut module.items,
&[
"hipDeviceptr_t",
"hipStream_t",
"hipModule_t",
"hipFunction_t",
],
);
let mut output = output.clone();
output.extend(path);
write_rust_to_file(output, &prettyplease::unparse(&module))
}
fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
for type_ in arg {
let type_ = Ident::new(type_, Span::call_site());
items.extend([
parse_quote! {
unsafe impl Send for #type_ {}
},
parse_quote! {
unsafe impl Sync for #type_ {}
},
]);
}
}
fn generate_functions(output: &PathBuf, path: &[&str], module: &syn::File) {
let fns_ = module.items.iter().filter_map(|item| match item {
Item::ForeignMod(extern_) => match &*extern_.items {
[ForeignItem::Fn(fn_)] => Some(fn_),
_ => unreachable!(),
},
_ => None,
});
let mut module: syn::File = parse_quote! {
extern "system" {
#(#fns_)*
}
};
syn::visit_mut::visit_file_mut(&mut PrependCudaPath, &mut module);
syn::visit_mut::visit_file_mut(&mut RemoveVisibility, &mut module);
syn::visit_mut::visit_file_mut(&mut ExplicitReturnType, &mut module);
let mut output = output.clone();
output.extend(path);
write_rust_to_file(output, &prettyplease::unparse(&module))
}
fn generate_types(output: &PathBuf, path: &[&str], module: &syn::File) {
let mut module = module.clone();
let mut converter = ConvertIntoRustResult {
type_: "CUresult",
underlying_type: "cudaError_enum",
new_error_type: "CUerror",
error_prefix: ("CUDA_ERROR_", "ERROR_"),
success: ("CUDA_SUCCESS", "SUCCESS"),
constants: Vec::new(),
};
module.items = module
.items
.into_iter()
.filter_map(|item| match item {
Item::ForeignMod(_) => None,
Item::Const(const_) => converter.get_const(const_).map(Item::Const),
Item::Use(use_) => converter.get_use(use_).map(Item::Use),
Item::Type(type_) => converter.get_type(type_).map(Item::Type),
Item::Struct(mut struct_) => {
let ident_string = struct_.ident.to_string();
match &*ident_string {
"CUdeviceptr_v2" => {
struct_.fields = Fields::Unnamed(parse_quote! {
(pub *mut ::core::ffi::c_void)
});
}
"CUuuid_st" => {
struct_.fields = Fields::Named(parse_quote! {
{pub bytes: [::core::ffi::c_uchar; 16usize]}
});
}
_ => {}
}
Some(Item::Struct(struct_))
}
item => Some(item),
})
.collect::<Vec<_>>();
converter.flush(&mut module.items);
module.items.push(parse_quote! {
impl From<hip_runtime_sys::hipErrorCode_t> for CUerror {
fn from(error: hip_runtime_sys::hipErrorCode_t) -> Self {
Self(error.0)
}
}
});
add_send_sync(
&mut module.items,
&[
"CUdeviceptr",
"CUcontext",
"CUstream",
"CUmodule",
"CUfunction",
"CUlibrary",
],
);
syn::visit_mut::visit_file_mut(&mut FixAbi, &mut module);
let mut output = output.clone();
output.extend(path);
write_rust_to_file(output, &prettyplease::unparse(&module))
}
fn write_rust_to_file(path: impl AsRef<std::path::Path>, content: &str) {
let mut file = File::create(path).unwrap();
file.write("// Generated automatically by zluda_bindgen\n// DO NOT EDIT MANUALLY\n#![allow(warnings)]\n".as_bytes())
.unwrap();
file.write(content.as_bytes()).unwrap();
}
struct ConvertIntoRustResult {
type_: &'static str,
underlying_type: &'static str,
new_error_type: &'static str,
error_prefix: (&'static str, &'static str),
success: (&'static str, &'static str),
constants: Vec<syn::ItemConst>,
}
impl ConvertIntoRustResult {
fn get_const(&mut self, const_: syn::ItemConst) -> Option<syn::ItemConst> {
let name = const_.ident.to_string();
if name.starts_with(self.underlying_type) {
self.constants.push(const_);
None
} else {
Some(const_)
}
}
fn get_use(&mut self, use_: ItemUse) -> Option<ItemUse> {
if let UseTree::Path(ref path) = use_.tree {
if let UseTree::Rename(ref rename) = &*path.tree {
if rename.rename == self.type_ {
return None;
}
}
}
Some(use_)
}
fn flush(self, items: &mut Vec<Item>) {
let type_ = format_ident!("{}", self.type_);
let type_trait = format_ident!("{}Consts", self.type_);
let new_error_type = format_ident!("{}", self.new_error_type);
let success = format_ident!("{}", self.success.1);
let mut result_variants = Vec::new();
let mut error_variants = Vec::new();
for const_ in self.constants.iter() {
let ident = const_.ident.to_string();
if ident.ends_with(self.success.0) {
result_variants.push(quote! {
const #success: #type_ = #type_::Ok(());
});
} else {
let old_prefix_len = self.underlying_type.len() + 1 + self.error_prefix.0.len();
let variant_ident =
format_ident!("{}{}", self.error_prefix.1, &ident[old_prefix_len..]);
let error_ident = format_ident!("{}", &ident[old_prefix_len..]);
let expr = &const_.expr;
result_variants.push(quote! {
const #variant_ident: #type_ = #type_::Err(#new_error_type::#error_ident);
});
error_variants.push(quote! {
pub const #error_ident: #new_error_type = #new_error_type(unsafe { ::core::num::NonZeroU32::new_unchecked(#expr) });
});
}
}
let extra_items: Punctuated<syn::Item, syn::parse::Nothing> = parse_quote! {
impl #new_error_type {
#(#error_variants)*
}
#[repr(transparent)]
#[derive(Debug, Hash, Copy, Clone, PartialEq, Eq)]
pub struct #new_error_type(pub ::core::num::NonZeroU32);
pub trait #type_trait {
#(#result_variants)*
}
impl #type_trait for #type_ {}
#[must_use]
pub type #type_ = ::core::result::Result<(), #new_error_type>;
const _: fn() = || {
let _ = std::mem::transmute::<#type_, u32>;
};
};
items.extend(extra_items);
}
fn get_type(&self, type_: syn::ItemType) -> Option<syn::ItemType> {
if type_.ident.to_string() == self.type_ {
None
} else {
Some(type_)
}
}
}
struct FixAbi;
impl VisitMut for FixAbi {
fn visit_abi_mut(&mut self, i: &mut Abi) {
if let Some(ref mut name) = i.name {
*name = LitStr::new("system", Span::call_site());
}
}
}
struct PrependCudaPath;
impl VisitMut for PrependCudaPath {
fn visit_type_path_mut(&mut self, type_: &mut TypePath) {
if type_.path.segments.len() == 1 {
match &*type_.path.segments[0].ident.to_string() {
"usize" | "f64" | "f32" => {}
_ => {
*type_ = parse_quote! { cuda_types :: #type_ };
}
}
}
}
}
struct RemoveVisibility;
impl VisitMut for RemoveVisibility {
fn visit_visibility_mut(&mut self, i: &mut syn::Visibility) {
*i = syn::Visibility::Inherited;
}
}
struct ExplicitReturnType;
impl VisitMut for ExplicitReturnType {
fn visit_return_type_mut(&mut self, i: &mut syn::ReturnType) {
if let syn::ReturnType::Default = i {
*i = parse_quote! { -> {} };
}
}
}
fn generate_display(
output: &PathBuf,
path: &[&str],
types_crate: &'static str,
module: &syn::File,
) {
let ignore_types = [
"CUdevice",
"CUdeviceptr_v1",
"CUarrayMapInfo_st",
"CUDA_RESOURCE_DESC_st",
"CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st",
"CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st",
"CUexecAffinityParam_st",
"CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st",
"CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st",
"CUuuid_st",
"HGPUNV",
"EGLint",
"EGLSyncKHR",
"EGLImageKHR",
"EGLStreamKHR",
"CUasyncNotificationInfo_st",
"CUgraphNodeParams_st",
"CUeglFrame_st",
"CUdevResource_st",
"CUlaunchAttribute_st",
"CUlaunchConfig_st",
];
let ignore_functions = [
"cuGLGetDevices",
"cuGLGetDevices_v2",
"cuStreamSetAttribute",
"cuStreamSetAttribute_ptsz",
"cuStreamGetAttribute",
"cuStreamGetAttribute_ptsz",
"cuGraphKernelNodeGetAttribute",
"cuGraphKernelNodeSetAttribute",
];
let count_selectors = [
("cuCtxCreate_v3", 1, 2),
("cuMemMapArrayAsync", 0, 1),
("cuMemMapArrayAsync_ptsz", 0, 1),
("cuStreamBatchMemOp", 2, 1),
("cuStreamBatchMemOp_ptsz", 2, 1),
("cuStreamBatchMemOp_v2", 2, 1),
];
let mut derive_state = DeriveDisplayState::new(
&ignore_types,
types_crate,
&ignore_functions,
&count_selectors,
);
let mut items = module
.items
.iter()
.filter_map(|i| cuda_derive_display_trait_for_item(&mut derive_state, i))
.collect::<Vec<_>>();
items.push(curesult_display_trait(&derive_state));
let mut output = output.clone();
output.extend(path);
write_rust_to_file(
output,
&prettyplease::unparse(&syn::File {
shebang: None,
attrs: Vec::new(),
items,
}),
);
}
struct DeriveDisplayState<'a> {
types_crate: &'static str,
ignore_types: FxHashSet<Ident>,
ignore_fns: FxHashSet<Ident>,
enums: FxHashMap<&'a Ident, Vec<&'a Ident>>,
array_arguments: FxHashMap<(Ident, usize), usize>,
result_variants: Vec<&'a ItemConst>,
}
impl<'a> DeriveDisplayState<'a> {
fn new(
ignore_types: &[&'static str],
types_crate: &'static str,
ignore_fns: &[&'static str],
count_selectors: &[(&'static str, usize, usize)],
) -> Self {
DeriveDisplayState {
types_crate,
ignore_types: ignore_types
.into_iter()
.map(|x| Ident::new(x, Span::call_site()))
.collect(),
ignore_fns: ignore_fns
.into_iter()
.map(|x| Ident::new(x, Span::call_site()))
.collect(),
array_arguments: count_selectors
.into_iter()
.map(|(name, val, count)| ((Ident::new(name, Span::call_site()), *val), *count))
.collect(),
enums: Default::default(),
result_variants: Vec::new(),
}
}
fn record_enum_variant(&mut self, enum_: &'a Ident, variant: &'a Ident) {
match self.enums.entry(enum_) {
hash_map::Entry::Occupied(mut entry) => {
entry.get_mut().push(variant);
}
hash_map::Entry::Vacant(entry) => {
entry.insert(vec![variant]);
}
}
}
}
fn cuda_derive_display_trait_for_item<'a>(
state: &mut DeriveDisplayState<'a>,
item: &'a Item,
) -> Option<syn::Item> {
let path_prefix = Path::from(Ident::new(state.types_crate, Span::call_site()));
let path_prefix_iter = iter::repeat(&path_prefix);
match item {
Item::Const(const_) => {
if const_.ty.to_token_stream().to_string() == "cudaError_enum" {
state.result_variants.push(const_);
}
None
}
Item::ForeignMod(ItemForeignMod { items, .. }) => match items.last().unwrap() {
ForeignItem::Fn(ForeignItemFn {
sig: Signature { ident, inputs, .. },
..
}) => {
if state.ignore_fns.contains(ident) {
return None;
}
let inputs = inputs
.iter()
.map(|fn_arg| {
let mut fn_arg = fn_arg.clone();
syn::visit_mut::visit_fn_arg_mut(&mut PrependCudaPath, &mut fn_arg);
fn_arg
})
.collect::<Vec<_>>();
let inputs_iter = inputs.iter();
let original_fn_name = ident.to_string();
let mut write_argument = inputs.iter().enumerate().map(|(index, fn_arg)| {
let name = fn_arg_name(fn_arg);
if let Some(length_index) = state.array_arguments.get(&(ident.clone(), index)) {
let length = fn_arg_name(&inputs[*length_index]);
quote! {
writer.write_all(concat!(stringify!(#name), ": ").as_bytes())?;
writer.write_all(b"[")?;
for i in 0..#length {
if i != 0 {
writer.write_all(b", ")?;
}
crate::format::CudaDisplay::write(unsafe { &*#name.add(i as usize) }, #original_fn_name, arg_idx, writer)?;
}
writer.write_all(b"]")?;
}
} else {
quote! {
writer.write_all(concat!(stringify!(#name), ": ").as_bytes())?;
crate::format::CudaDisplay::write(&#name, #original_fn_name, arg_idx, writer)?;
}
}
});
let fn_name = format_ident!("write_{}", ident);
Some(match write_argument.next() {
Some(first_write_argument) => parse_quote! {
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized), #(#inputs_iter,)*) -> std::io::Result<()> {
let mut arg_idx = 0usize;
writer.write_all(b"(")?;
#first_write_argument
#(
arg_idx += 1;
writer.write_all(b", ")?;
#write_argument
)*
writer.write_all(b")")
}
},
None => parse_quote! {
pub fn #fn_name(writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
writer.write_all(b"()")
}
},
})
}
_ => unreachable!(),
},
Item::Impl(ref item_impl) => {
let enum_ = match &*item_impl.self_ty {
Type::Path(ref path) => &path.path.segments.last().unwrap().ident,
_ => unreachable!(),
};
let variant_ = match item_impl.items.last().unwrap() {
syn::ImplItem::Const(item_const) => &item_const.ident,
_ => unreachable!(),
};
state.record_enum_variant(enum_, variant_);
None
}
Item::Struct(item_struct) => {
if state.ignore_types.contains(&item_struct.ident) {
return None;
}
if state.enums.contains_key(&item_struct.ident) {
let enum_ = &item_struct.ident;
let enum_iter = iter::repeat(&item_struct.ident);
let variants = state.enums.get(&item_struct.ident).unwrap().iter();
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #enum_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
match self {
#(& #path_prefix_iter :: #enum_iter :: #variants => writer.write_all(stringify!(#variants).as_bytes()),)*
_ => write!(writer, "{}", self.0)
}
}
}
})
} else {
let struct_ = &item_struct.ident;
match item_struct.fields {
Fields::Named(ref fields) => {
let mut rest_of_fields = fields.named.iter().filter_map(|f| {
let f_ident = f.ident.as_ref().unwrap();
let name = f_ident.to_string();
if name.starts_with("reserved") || name == "_unused" {
None
} else {
Some(f_ident)
}
});
let first_field = match rest_of_fields.next() {
Some(f) => f,
None => return None,
};
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
writer.write_all(concat!("{ ", stringify!(#first_field), ": ").as_bytes())?;
crate::format::CudaDisplay::write(&self.#first_field, "", 0, writer)?;
#(
writer.write_all(concat!(", ", stringify!(#rest_of_fields), ": ").as_bytes())?;
crate::format::CudaDisplay::write(&self.#rest_of_fields, "", 0, writer)?;
)*
writer.write_all(b" }")
}
}
})
}
Fields::Unnamed(FieldsUnnamed { ref unnamed, .. }) if unnamed.len() == 1 => {
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #struct_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", self.0)
}
}
})
}
_ => return None,
}
}
}
Item::Type(item_type) => {
if state.ignore_types.contains(&item_type.ident) {
return None;
};
match &*item_type.ty {
Type::Ptr(_) => {
let type_ = &item_type.ident;
Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", *self)
}
}
})
}
Type::Path(type_path) => {
if type_path.path.leading_colon.is_some() {
let option_seg = type_path.path.segments.last().unwrap();
if option_seg.ident == "Option" {
match &option_seg.arguments {
PathArguments::AngleBracketed(generic) => match generic.args[0] {
syn::GenericArgument::Type(Type::BareFn(_)) => {
let type_ = &item_type.ident;
return Some(parse_quote! {
impl crate::format::CudaDisplay for #path_prefix :: #type_ {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
write!(writer, "{:p}", unsafe { std::mem::transmute::<#path_prefix :: #type_, *mut ::std::ffi::c_void>(*self) })
}
}
});
}
_ => unreachable!(),
},
_ => unreachable!(),
}
}
}
None
}
_ => unreachable!(),
}
}
Item::Union(_) => None,
Item::Use(_) => None,
_ => unreachable!(),
}
}
fn fn_arg_name(fn_arg: &FnArg) -> &Box<syn::Pat> {
let name = if let FnArg::Typed(t) = fn_arg {
&t.pat
} else {
unreachable!()
};
name
}
fn curesult_display_trait(derive_state: &DeriveDisplayState) -> syn::Item {
let errors = derive_state.result_variants.iter().filter_map(|const_| {
let prefix = "cudaError_enum_";
let text = &const_.ident.to_string()[prefix.len()..];
if text == "CUDA_SUCCESS" {
return None;
}
let expr = &const_.expr;
Some(quote! {
#expr => writer.write_all(#text.as_bytes()),
})
});
parse_quote! {
impl crate::format::CudaDisplay for cuda_types::CUresult {
fn write(&self, _fn_name: &'static str, _index: usize, writer: &mut (impl std::io::Write + ?Sized)) -> std::io::Result<()> {
match self {
Ok(()) => writer.write_all(b"CUDA_SUCCESS"),
Err(err) => {
match err.0.get() {
#(#errors)*
err => write!(writer, "{}", err)
}
}
}
}
}
}
}

View File

@ -2,7 +2,7 @@
name = "zluda_dump"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
edition = "2021"
[lib]
name = "zluda_dump"
@ -10,6 +10,7 @@ crate-type = ["cdylib"]
[dependencies]
ptx = { path = "../ptx" }
ptx_parser = { path = "../ptx_parser" }
lz4-sys = "1.9"
regex = "1.4"
dynasm = "1.2"

View File

@ -28,6 +28,7 @@ impl Hash for CUuuidWrapper {
}
}
#[allow(improper_ctypes_definitions)]
pub(crate) struct OriginalExports {
original_get_module_from_cubin: Option<
unsafe extern "system" fn(
@ -356,6 +357,7 @@ unsafe fn record_submodules_from_fatbin(
);
}
#[allow(improper_ctypes_definitions)]
unsafe extern "system" fn get_module_from_cubin(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
@ -388,6 +390,7 @@ unsafe extern "system" fn get_module_from_cubin(
)
}
#[allow(improper_ctypes_definitions)]
unsafe extern "system" fn get_module_from_cubin_ext1(
module: *mut CUmodule,
fatbinc_wrapper: *const FatbincWrapper,
@ -451,6 +454,7 @@ unsafe extern "system" fn get_module_from_cubin_ext1(
)
}
#[allow(improper_ctypes_definitions)]
unsafe extern "system" fn get_module_from_cubin_ext2(
fatbin_header: *const FatbinHeader,
module: *mut CUmodule,
@ -508,7 +512,7 @@ unsafe extern "system" fn get_module_from_cubin_ext2(
.original_get_module_from_cubin_ext2
.unwrap()(fatbin_header, module, ptr1, ptr2, _unknown);
fn_logger.result = Some(result);
if result != CUresult::CUDA_SUCCESS {
if result.is_err() {
return result;
}
record_submodules_from_fatbin(

View File

@ -1,11 +1,10 @@
use cuda_types::{CUGLDeviceList, CUdevice};
use std::{
ffi::{c_void, CStr},
fmt::LowerHex,
mem, ptr, slice,
};
use cuda_base::cuda_derive_display_trait;
pub(crate) trait CudaDisplay {
fn write(
&self,
@ -27,28 +26,6 @@ impl CudaDisplay for cuda_types::CUuuid {
}
}
impl CudaDisplay for cuda_types::CUdevice {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
write!(writer, "{}", self.0)
}
}
impl CudaDisplay for cuda_types::CUdeviceptr {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
write!(writer, "{:p}", self.0)
}
}
impl CudaDisplay for cuda_types::CUdeviceptr_v1 {
fn write(
&self,
@ -494,6 +471,59 @@ impl CudaDisplay
}
}
impl CudaDisplay for cuda_types::CUgraphNodeParams_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl CudaDisplay for cuda_types::CUlaunchConfig_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl CudaDisplay for cuda_types::CUeglFrame_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl CudaDisplay for cuda_types::CUdevResource_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl CudaDisplay for cuda_types::CUlaunchAttribute_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl<T: CudaDisplay> CudaDisplay for *mut T {
fn write(
&self,
@ -544,34 +574,26 @@ impl<T: CudaDisplay, const N: usize> CudaDisplay for [T; N] {
}
}
#[allow(non_snake_case)]
pub fn write_cuStreamBatchMemOp(
writer: &mut (impl std::io::Write + ?Sized),
stream: cuda_types::CUstream,
count: ::std::os::raw::c_uint,
paramArray: *mut cuda_types::CUstreamBatchMemOpParams,
flags: ::std::os::raw::c_uint,
) -> std::io::Result<()> {
writer.write_all(b"(stream: ")?;
CudaDisplay::write(&stream, "cuStreamBatchMemOp", 0, writer)?;
writer.write_all(b", ")?;
writer.write_all(b"count: ")?;
CudaDisplay::write(&count, "cuStreamBatchMemOp", 1, writer)?;
writer.write_all(b", paramArray: [")?;
for i in 0..count {
if i != 0 {
writer.write_all(b", ")?;
}
CudaDisplay::write(
&unsafe { paramArray.add(i as usize) },
"cuStreamBatchMemOp",
2,
writer,
)?;
impl CudaDisplay for cuda_types::CUarrayMapInfo_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
}
impl CudaDisplay for cuda_types::CUexecAffinityParam_st {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
_writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
todo!()
}
writer.write_all(b"], flags: ")?;
CudaDisplay::write(&flags, "cuStreamBatchMemOp", 3, writer)?;
writer.write_all(b") ")
}
#[allow(non_snake_case)]
@ -585,27 +607,7 @@ pub fn write_cuGraphKernelNodeGetAttribute(
CudaDisplay::write(&hNode, "cuGraphKernelNodeGetAttribute", 0, writer)?;
writer.write_all(b", attr: ")?;
CudaDisplay::write(&attr, "cuGraphKernelNodeGetAttribute", 1, writer)?;
match attr {
cuda_types::CUkernelNodeAttrID::CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).accessPolicyWindow },
"cuGraphKernelNodeGetAttribute",
2,
writer,
)?;
}
cuda_types::CUkernelNodeAttrID::CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).cooperative },
"cuGraphKernelNodeGetAttribute",
2,
writer,
)?;
}
_ => return writer.write_all(b", ...) "),
}
write_launch_attribute(writer, "cuGraphKernelNodeGetAttribute", 2, attr, value_out)?;
writer.write_all(b") ")
}
@ -630,28 +632,73 @@ pub fn write_cuStreamGetAttribute(
CudaDisplay::write(&hStream, "cuStreamGetAttribute", 0, writer)?;
writer.write_all(b", attr: ")?;
CudaDisplay::write(&attr, "cuStreamGetAttribute", 1, writer)?;
match attr {
cuda_types::CUstreamAttrID::CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
write_launch_attribute(writer, "cuStreamGetAttribute", 2, attr, value_out)?;
writer.write_all(b") ")
}
fn write_launch_attribute(
writer: &mut (impl std::io::Write + ?Sized),
fn_name: &'static str,
index: usize,
attribute: cuda_types::CUlaunchAttributeID,
value_out: *mut cuda_types::CUstreamAttrValue,
) -> std::io::Result<()> {
match attribute {
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).accessPolicyWindow },
"cuStreamGetAttribute",
2,
fn_name,
index,
writer,
)?;
)
}
cuda_types::CUstreamAttrID::CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_COOPERATIVE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(
unsafe { &(*value_out).syncPolicy },
"cuStreamGetAttribute",
2,
writer,
)?;
CudaDisplay::write(unsafe { &(*value_out).cooperative }, fn_name, index, writer)
}
_ => return writer.write_all(b", ...) "),
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).syncPolicy }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).clusterDim }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).clusterSchedulingPolicyPreference }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).programmaticStreamSerializationAllowed }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).programmaticEvent }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).priority }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).memSyncDomainMap }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).memSyncDomain }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).launchCompletionEvent }, fn_name, index, writer)
}
cuda_types::CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).deviceUpdatableKernelNode }, fn_name, index, writer)
}
_ => writer.write_all(b", ... "),
}
writer.write_all(b") ")
}
#[allow(non_snake_case)]
@ -685,71 +732,27 @@ pub fn write_cuStreamSetAttribute_ptsz(
}
#[allow(non_snake_case)]
pub fn write_cuCtxCreate_v3(
pub fn write_cuGLGetDevices(
_writer: &mut (impl std::io::Write + ?Sized),
_pctx: *mut cuda_types::CUcontext,
_paramsArray: *mut cuda_types::CUexecAffinityParam,
_numParams: ::std::os::raw::c_int,
_flags: ::std::os::raw::c_uint,
_dev: cuda_types::CUdevice,
_pCudaDeviceCount: *mut ::std::os::raw::c_uint,
_pCudaDevices: *mut CUdevice,
_cudaDeviceCount: ::std::os::raw::c_uint,
_deviceList: CUGLDeviceList,
) -> std::io::Result<()> {
todo!()
}
#[allow(non_snake_case)]
pub fn write_cuCtxGetExecAffinity(
pub fn write_cuGLGetDevices_v2(
_writer: &mut (impl std::io::Write + ?Sized),
_pExecAffinity: *mut cuda_types::CUexecAffinityParam,
_type_: cuda_types::CUexecAffinityType,
_pCudaDeviceCount: *mut ::std::os::raw::c_uint,
_pCudaDevices: *mut CUdevice,
_cudaDeviceCount: ::std::os::raw::c_uint,
_deviceList: CUGLDeviceList,
) -> std::io::Result<()> {
todo!()
}
#[allow(non_snake_case)]
pub fn write_cuMemMapArrayAsync(
_writer: &mut (impl std::io::Write + ?Sized),
_mapInfoList: *mut cuda_types::CUarrayMapInfo,
_count: ::std::os::raw::c_uint,
_hStream: cuda_types::CUstream,
) -> std::io::Result<()> {
todo!()
}
#[allow(non_snake_case)]
pub fn write_cuMemMapArrayAsync_ptsz(
writer: &mut (impl std::io::Write + ?Sized),
mapInfoList: *mut cuda_types::CUarrayMapInfo,
count: ::std::os::raw::c_uint,
hStream: cuda_types::CUstream,
) -> std::io::Result<()> {
write_cuMemMapArrayAsync(writer, mapInfoList, count, hStream)
}
cuda_derive_display_trait!(
cuda_types,
CudaDisplay,
[
CUarrayMapInfo_st,
CUDA_RESOURCE_DESC_st,
CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st,
CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st,
CUexecAffinityParam_st,
CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
CUstreamBatchMemOpParams_union_CUstreamMemOpWriteValueParams_st,
CUuuid_st,
HGPUNV
],
[
cuCtxCreate_v3,
cuCtxGetExecAffinity,
cuGraphKernelNodeGetAttribute,
cuGraphKernelNodeSetAttribute,
cuMemMapArrayAsync,
cuMemMapArrayAsync_ptsz,
cuStreamBatchMemOp,
cuStreamGetAttribute,
cuStreamGetAttribute_ptsz,
cuStreamSetAttribute,
cuStreamSetAttribute_ptsz
]
);
#[path = "format_generated.rs"]
mod format_generated;
pub(crate) use format_generated::*;

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +1,18 @@
use cuda_types::{
CUdevice, CUdevice_attribute, CUfunction, CUjit_option, CUmodule, CUresult, CUuuid,
};
use cuda_types::*;
use paste::paste;
use side_by_side::CudaDynamicFns;
use std::io;
use std::{
collections::HashMap, env, error::Error, ffi::c_void, fs, path::PathBuf, ptr::NonNull, rc::Rc,
sync::Mutex,
};
use std::{collections::HashMap, env, error::Error, fs, path::PathBuf, rc::Rc, sync::Mutex};
#[macro_use]
extern crate lazy_static;
extern crate cuda_types;
macro_rules! extern_redirect {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path);*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
$(
#[no_mangle]
#[allow(improper_ctypes_definitions)]
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
let original_fn = |dynamic_fns: &mut crate::side_by_side::CudaDynamicFns| {
dynamic_fns.$fn_name($( $arg_id ),*)
@ -34,10 +30,11 @@ macro_rules! extern_redirect {
}
macro_rules! extern_redirect_with_post {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path);*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
$(
#[no_mangle]
pub extern "system" fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
#[allow(improper_ctypes_definitions)]
pub extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
let original_fn = |dynamic_fns: &mut crate::side_by_side::CudaDynamicFns| {
dynamic_fns.$fn_name($( $arg_id ),*)
};
@ -60,10 +57,8 @@ macro_rules! extern_redirect_with_post {
use cuda_base::cuda_function_declarations;
cuda_function_declarations!(
cuda_types,
extern_redirect,
extern_redirect_with_post,
[
extern_redirect_with_post <= [
cuModuleLoad,
cuModuleLoadData,
cuModuleLoadDataEx,
@ -205,10 +200,10 @@ impl Settings {
}
};
let libcuda_path = match env::var("ZLUDA_CUDA_LIB") {
Err(env::VarError::NotPresent) => os::LIBCUDA_DEFAULT_PATH.to_owned(),
Err(env::VarError::NotPresent) => os::LIBCUDA_DEFAULT_PATH.to_string(),
Err(e) => {
logger.log(log::LogEntry::ErrorBox(Box::new(e) as _));
os::LIBCUDA_DEFAULT_PATH.to_owned()
os::LIBCUDA_DEFAULT_PATH.to_string()
}
Ok(env_string) => env_string,
};
@ -302,7 +297,7 @@ where
// alternatively we could return a CUDA error, but I think it's fine to
// crash. This is a diagnostic utility, if the lock was poisoned we can't
// extract any useful trace or logging anyway
let mut global_state = &mut *global_state_mutex.lock().unwrap();
let global_state = &mut *global_state_mutex.lock().unwrap();
let (mut logger, delayed_state) = match global_state.delayed_state {
LateInit::Success(ref mut delayed_state) => (
global_state.log_factory.get_logger(func, arguments_writer),
@ -325,7 +320,7 @@ where
logger.log(log::LogEntry::ErrorBox(
format!("No function {} in the underlying CUDA library", func).into(),
));
CUresult::CUDA_ERROR_UNKNOWN
CUresult::ERROR_UNKNOWN
}
};
logger.result = maybe_cu_result;
@ -359,7 +354,7 @@ pub(crate) fn cuModuleLoad_Post(
state: &mut trace::StateTracker,
result: CUresult,
) {
if result != CUresult::CUDA_SUCCESS {
if result.is_err() {
return;
}
state.record_new_module_file(unsafe { *module }, fname, fn_logger)
@ -373,7 +368,7 @@ pub(crate) fn cuModuleLoadData_Post(
state: &mut trace::StateTracker,
result: CUresult,
) {
if result != CUresult::CUDA_SUCCESS {
if result.is_err() {
return;
}
state.record_new_module(unsafe { *module }, raw_image, fn_logger)
@ -401,7 +396,7 @@ pub(crate) fn cuGetExportTable_Post(
state: &mut trace::StateTracker,
result: CUresult,
) {
if result != CUresult::CUDA_SUCCESS {
if result.is_err() {
return;
}
dark_api::override_export_table(ppExportTable, pExportTableId, state)
@ -451,7 +446,7 @@ pub(crate) fn cuModuleLoadFatBinary_Post(
_state: &mut trace::StateTracker,
result: CUresult,
) {
if result == CUresult::CUDA_SUCCESS {
if result.is_ok() {
panic!()
}
}

View File

@ -1,8 +1,8 @@
use crate::cuda::CUuuid;
use cuda_types::CUuuid;
use std::ffi::{c_void, CStr, CString};
use std::mem;
pub(crate) const LIBCUDA_DEFAULT_PATH: &'static str = b"/usr/lib/x86_64-linux-gnu/libcuda.so.1\0";
pub(crate) const LIBCUDA_DEFAULT_PATH: &str = "/usr/lib/x86_64-linux-gnu/libcuda.so.1";
pub unsafe fn load_library(libcuda_path: &str) -> *mut c_void {
let libcuda_path = CString::new(libcuda_path).unwrap();

View File

@ -56,8 +56,10 @@ impl CudaDynamicFns {
}
macro_rules! emit_cuda_fn_table {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path);*) => {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:path;)*) => {
#[derive(Default)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
struct CudaFnTable {
$($fn_name: DynamicFn<extern $abi fn ( $($arg_id : $arg_type),* ) -> $ret_type>),*
}
@ -74,4 +76,4 @@ macro_rules! emit_cuda_fn_table {
};
}
cuda_function_declarations!(cuda_types, emit_cuda_fn_table, emit_cuda_fn_table, []);
cuda_function_declarations!(emit_cuda_fn_table);

View File

@ -1,6 +1,3 @@
use ptx::{ast::PtxError, Token};
use ptx::{DisplayParseError, ModuleParserExt};
use crate::{dark_api, log, Settings};
use cuda_types::CUmodule;
use std::{
@ -172,7 +169,7 @@ impl StateTracker {
submodule_index: Option<usize>,
module_text: &str,
) {
let (_ast, errors) = ptx::ModuleParser::parse_unchecked(module_text);
let errors = ptx_parser::parse_for_errors(module_text);
if !errors.is_empty() {
fn_logger.log(log::LogEntry::ModuleParsingError(
DumpWriter::get_file_name(module_index, version, submodule_index, "log"),
@ -232,7 +229,7 @@ impl DumpWriter {
module_index: usize,
version: Option<usize>,
submodule_index: Option<usize>,
errors: &[ptx::ParseError<usize, Token<'input>, PtxError>],
errors: &[ptx_parser::PtxError<'input>],
) -> io::Result<()> {
let mut log_file = match &self.dump_dir {
None => return Ok(()),
@ -246,8 +243,7 @@ impl DumpWriter {
));
let mut file = File::create(log_file)?;
for error in errors {
let pretty_print_error = unsafe { DisplayParseError::new(error, module_text) };
writeln!(file, "{}", pretty_print_error)?;
writeln!(file, "{}", error)?;
}
Ok(())
}

View File

@ -1,12 +0,0 @@
[package]
name = "zluda_lib"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
[lib]
name = "nvcuda"
crate-type = ["cdylib"]
[dependencies]
zluda = { path = "../zluda" }

View File

@ -1 +0,0 @@
This project exist solely as a workaround, to make sure that ZLUDA-created CUDA driver does not clash with real CUDA driver when running unit tests

View File

@ -1,11 +0,0 @@
pub extern crate zluda;
pub use zluda::cuda::*;
// For some reason, on Linux linker strips out all our re-exports,
// there's probably a cleaner solution, but for now just exporting
// the function below stops it from doing so
#[no_mangle]
fn _zluda_very_bad_linker_hack() {
cuInit(0);
}

View File

@ -2,12 +2,8 @@
name = "zluda_ml"
version = "0.0.0"
authors = ["Andrzej Janik <vosen@vosen.pl>"]
edition = "2018"
edition = "2021"
[lib]
name = "nvml"
crate-type = ["cdylib"]
[dependencies.ocl-core]
version = "0.11"
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]

View File

@ -1,23 +1,5 @@
use std::{
os::raw::{c_char, c_uint},
ptr,
};
use crate::nvml::nvmlReturn_t;
const VERSION: &'static [u8] = b"418.40.04";
macro_rules! stringify_nmvlreturn_t {
($x:ident => [ $($variant:ident),+ ]) => {
match $x {
$(
nvmlReturn_t::$variant => Some(concat!(stringify!($variant), "\0")),
)+
_ => None
}
}
}
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> nvmlReturn_t {
unimplemented!()
@ -27,119 +9,3 @@ pub(crate) fn unimplemented() -> nvmlReturn_t {
pub(crate) fn unimplemented() -> nvmlReturn_t {
nvmlReturn_t::NVML_ERROR_NOT_SUPPORTED
}
pub(crate) fn error_string(result: nvmlReturn_t) -> *const ::std::os::raw::c_char {
let text = stringify_nmvlreturn_t!(
result => [
NVML_SUCCESS,
NVML_ERROR_UNINITIALIZED,
NVML_ERROR_INVALID_ARGUMENT,
NVML_ERROR_NOT_SUPPORTED,
NVML_ERROR_NO_PERMISSION,
NVML_ERROR_ALREADY_INITIALIZED,
NVML_ERROR_NOT_FOUND,
NVML_ERROR_INSUFFICIENT_SIZE,
NVML_ERROR_INSUFFICIENT_POWER,
NVML_ERROR_DRIVER_NOT_LOADED,
NVML_ERROR_TIMEOUT,
NVML_ERROR_IRQ_ISSUE,
NVML_ERROR_LIBRARY_NOT_FOUND,
NVML_ERROR_FUNCTION_NOT_FOUND,
NVML_ERROR_CORRUPTED_INFOROM,
NVML_ERROR_GPU_IS_LOST,
NVML_ERROR_RESET_REQUIRED,
NVML_ERROR_OPERATING_SYSTEM,
NVML_ERROR_LIB_RM_VERSION_MISMATCH,
NVML_ERROR_IN_USE,
NVML_ERROR_MEMORY,
NVML_ERROR_NO_DATA,
NVML_ERROR_VGPU_ECC_NOT_SUPPORTED,
NVML_ERROR_INSUFFICIENT_RESOURCES,
NVML_ERROR_UNKNOWN
]
);
match text {
Some(text) => text.as_ptr() as *const _,
None => ptr::null(),
}
}
pub(crate) fn shutdown() -> nvmlReturn_t {
nvmlReturn_t::NVML_SUCCESS
}
static mut DEVICE: Option<ocl_core::DeviceId> = None;
pub(crate) fn init() -> Result<(), nvmlReturn_t> {
let platforms = ocl_core::get_platform_ids()?;
let device = platforms.iter().find_map(|plat| {
let devices = ocl_core::get_device_ids(plat, Some(ocl_core::DeviceType::GPU), None).ok()?;
for dev in devices {
let vendor = ocl_core::get_device_info(dev, ocl_core::DeviceInfo::VendorId).ok()?;
match vendor {
ocl_core::DeviceInfoResult::VendorId(0x8086)
| ocl_core::DeviceInfoResult::VendorId(0x1002) => {}
_ => continue,
};
let dev_type = ocl_core::get_device_info(dev, ocl_core::DeviceInfo::Type).ok()?;
if let ocl_core::DeviceInfoResult::Type(ocl_core::DeviceType::GPU) = dev_type {
return Some(dev);
}
}
None
});
unsafe { DEVICE = device };
if device.is_some() {
Ok(())
} else {
Err(nvmlReturn_t::NVML_ERROR_UNKNOWN)
}
}
pub(crate) fn init_with_flags() -> Result<(), nvmlReturn_t> {
init()
}
impl From<ocl_core::Error> for nvmlReturn_t {
fn from(_: ocl_core::Error) -> Self {
nvmlReturn_t::NVML_ERROR_UNKNOWN
}
}
impl From<Result<(), nvmlReturn_t>> for nvmlReturn_t {
fn from(result: Result<(), nvmlReturn_t>) -> Self {
match result {
Ok(()) => nvmlReturn_t::NVML_SUCCESS,
Err(e) => e,
}
}
}
struct CountingWriter<T: std::io::Write> {
pub base: T,
pub len: usize,
}
impl<T: std::io::Write> std::io::Write for CountingWriter<T> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.len += buf.len();
self.base.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.base.flush()
}
}
pub(crate) unsafe fn system_get_driver_version(
version_ptr: *mut c_char,
length: c_uint,
) -> Result<(), nvmlReturn_t> {
if version_ptr == ptr::null_mut() || length == 0 {
return Err(nvmlReturn_t::NVML_ERROR_INVALID_ARGUMENT);
}
let strlen = usize::min(VERSION.len(), (length as usize) - 1);
std::ptr::copy_nonoverlapping(VERSION.as_ptr(), version_ptr as _, strlen);
*version_ptr.add(strlen) = 0;
Ok(())
}

View File

@ -1131,27 +1131,27 @@ pub use self::nvmlPcieLinkState_enum as nvmlPcieLinkState_t;
#[no_mangle]
pub extern "C" fn nvmlInit_v2() -> nvmlReturn_t {
crate::r#impl::init().into()
crate::r#impl::unimplemented()
}
#[no_mangle]
pub extern "C" fn nvmlInit() -> nvmlReturn_t {
crate::r#impl::init().into()
crate::r#impl::unimplemented()
}
#[no_mangle]
pub extern "C" fn nvmlInitWithFlags(flags: ::std::os::raw::c_uint) -> nvmlReturn_t {
crate::r#impl::init_with_flags().into()
crate::r#impl::unimplemented()
}
#[no_mangle]
pub extern "C" fn nvmlShutdown() -> nvmlReturn_t {
crate::r#impl::shutdown()
crate::r#impl::unimplemented()
}
#[no_mangle]
pub extern "C" fn nvmlErrorString(result: nvmlReturn_t) -> *const ::std::os::raw::c_char {
crate::r#impl::error_string(result)
c"".as_ptr()
}
#[no_mangle]
@ -1159,7 +1159,7 @@ pub unsafe extern "C" fn nvmlSystemGetDriverVersion(
version: *mut ::std::os::raw::c_char,
length: ::std::os::raw::c_uint,
) -> nvmlReturn_t {
crate::r#impl::system_get_driver_version(version, length).into()
crate::r#impl::unimplemented()
}
#[no_mangle]