mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-24 18:38:53 +03:00
Minor fixes requried by geekbench
This commit is contained in:
@ -193,6 +193,7 @@ fn join(fn_: Vec<String>, find_module: bool) -> Punctuated<Ident, Token![::]> {
|
|||||||
"func" => &["function"],
|
"func" => &["function"],
|
||||||
"mem" => &["memory"],
|
"mem" => &["memory"],
|
||||||
"memcpy" => &["memory", "copy"],
|
"memcpy" => &["memory", "copy"],
|
||||||
|
"memset" => &["memory", "set"],
|
||||||
_ => return None,
|
_ => return None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
Binary file not shown.
@ -1,5 +1,6 @@
|
|||||||
// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`:
|
// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`
|
||||||
// /opt/rocm/llvm/bin/clang -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc
|
// `fdenormal-fp-math=dynamic` is required to make functions eligible for inlining
|
||||||
|
// /opt/rocm/llvm/bin/clang -Xclang -fdenormal-fp-math=dynamic -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
@ -37,7 +38,7 @@ extern "C"
|
|||||||
return (uint32_t)__ockl_get_num_groups(member);
|
return (uint32_t)__ockl_get_num_groups(member);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
|
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __device__;
|
||||||
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
|
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||||
{
|
{
|
||||||
uint32_t pos = pos_32 & 0xFFU;
|
uint32_t pos = pos_32 & 0xFFU;
|
||||||
@ -65,7 +66,7 @@ extern "C"
|
|||||||
return (base >> pos) & ((1UL << len) - 1UL);
|
return (base >> pos) & ((1UL << len) - 1UL);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
|
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __device__;
|
||||||
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
|
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
|
||||||
{
|
{
|
||||||
uint32_t pos = pos_32 & 0xFFU;
|
uint32_t pos = pos_32 & 0xFFU;
|
||||||
@ -120,7 +121,7 @@ extern "C"
|
|||||||
return (base << (64U - pos - len)) >> (64U - len);
|
return (base << (64U - pos - len)) >> (64U - len);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __attribute__((device));
|
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __device__;
|
||||||
uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32)
|
uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||||
{
|
{
|
||||||
uint32_t pos = pos_32 & 0xFFU;
|
uint32_t pos = pos_32 & 0xFFU;
|
||||||
@ -148,4 +149,10 @@ extern "C"
|
|||||||
mask = ((1UL << len) - 1UL) << (pos);
|
mask = ((1UL << len) - 1UL) << (pos);
|
||||||
return (~mask & base) | (mask & (insert << pos));
|
return (~mask & base) | (mask & (insert << pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FUNC(bar_sync)(uint32_t)
|
||||||
|
{
|
||||||
|
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
|
||||||
|
__builtin_amdgcn_s_barrier();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -534,7 +534,6 @@ impl<'a> MethodEmitContext<'a> {
|
|||||||
ast::Instruction::Sqrt { data, arguments } => self.emit_sqrt(data, arguments),
|
ast::Instruction::Sqrt { data, arguments } => self.emit_sqrt(data, arguments),
|
||||||
ast::Instruction::Rsqrt { data, arguments } => self.emit_rsqrt(data, arguments),
|
ast::Instruction::Rsqrt { data, arguments } => self.emit_rsqrt(data, arguments),
|
||||||
ast::Instruction::Selp { data, arguments } => self.emit_selp(data, arguments),
|
ast::Instruction::Selp { data, arguments } => self.emit_selp(data, arguments),
|
||||||
ast::Instruction::Bar { .. } => todo!(),
|
|
||||||
ast::Instruction::Atom { data, arguments } => self.emit_atom(data, arguments),
|
ast::Instruction::Atom { data, arguments } => self.emit_atom(data, arguments),
|
||||||
ast::Instruction::AtomCas { data, arguments } => self.emit_atom_cas(data, arguments),
|
ast::Instruction::AtomCas { data, arguments } => self.emit_atom_cas(data, arguments),
|
||||||
ast::Instruction::Div { data, arguments } => self.emit_div(data, arguments),
|
ast::Instruction::Div { data, arguments } => self.emit_div(data, arguments),
|
||||||
@ -554,6 +553,7 @@ impl<'a> MethodEmitContext<'a> {
|
|||||||
ast::Instruction::Trap {} => todo!(),
|
ast::Instruction::Trap {} => todo!(),
|
||||||
// replaced by a function call
|
// replaced by a function call
|
||||||
ast::Instruction::Bfe { .. }
|
ast::Instruction::Bfe { .. }
|
||||||
|
| ast::Instruction::Bar { .. }
|
||||||
| ast::Instruction::Bfi { .. }
|
| ast::Instruction::Bfi { .. }
|
||||||
| ast::Instruction::Activemask { .. } => return Err(error_unreachable()),
|
| ast::Instruction::Activemask { .. } => return Err(error_unreachable()),
|
||||||
}
|
}
|
||||||
@ -1565,8 +1565,12 @@ impl<'a> MethodEmitContext<'a> {
|
|||||||
Some(LLVMBuildFPToUI),
|
Some(LLVMBuildFPToUI),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
ptx_parser::CvtMode::FPFromSigned(_) => todo!(),
|
ptx_parser::CvtMode::FPFromSigned(_) => {
|
||||||
ptx_parser::CvtMode::FPFromUnsigned(_) => todo!(),
|
return self.emit_cvt_int_to_float(data.to, arguments, LLVMBuildSIToFP)
|
||||||
|
}
|
||||||
|
ptx_parser::CvtMode::FPFromUnsigned(_) => {
|
||||||
|
return self.emit_cvt_int_to_float(data.to, arguments, LLVMBuildUIToFP)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let src = self.resolver.value(arguments.src)?;
|
let src = self.resolver.value(arguments.src)?;
|
||||||
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||||
@ -1721,6 +1725,25 @@ impl<'a> MethodEmitContext<'a> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn emit_cvt_int_to_float(
|
||||||
|
&mut self,
|
||||||
|
to: ptx_parser::ScalarType,
|
||||||
|
arguments: ptx_parser::CvtArgs<SpirvWord>,
|
||||||
|
llvm_func: unsafe extern "C" fn(
|
||||||
|
arg1: LLVMBuilderRef,
|
||||||
|
Val: LLVMValueRef,
|
||||||
|
DestTy: LLVMTypeRef,
|
||||||
|
Name: *const i8,
|
||||||
|
) -> LLVMValueRef,
|
||||||
|
) -> Result<(), TranslateError> {
|
||||||
|
let type_ = get_scalar_type(self.context, to);
|
||||||
|
let src = self.resolver.value(arguments.src)?;
|
||||||
|
self.resolver.with_result(arguments.dst, |dst| unsafe {
|
||||||
|
llvm_func(self.builder, src, type_, dst)
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn emit_rsqrt(
|
fn emit_rsqrt(
|
||||||
&mut self,
|
&mut self,
|
||||||
data: ptx_parser::TypeFtz,
|
data: ptx_parser::TypeFtz,
|
||||||
|
@ -104,6 +104,9 @@ fn run_instruction<'input>(
|
|||||||
let name = ["bfi_", scalar_to_ptx_name(data)].concat();
|
let name = ["bfi_", scalar_to_ptx_name(data)].concat();
|
||||||
to_call(resolver, fn_declarations, name.into(), i)?
|
to_call(resolver, fn_declarations, name.into(), i)?
|
||||||
}
|
}
|
||||||
|
i @ ptx_parser::Instruction::Bar { .. } => {
|
||||||
|
to_call(resolver, fn_declarations, "bar_sync".into(), i)?
|
||||||
|
}
|
||||||
i => i,
|
i => i,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use hip_runtime_sys::*;
|
use hip_runtime_sys::*;
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
|
pub(crate) fn alloc_v2(dptr: *mut hipDeviceptr_t, bytesize: usize) -> hipError_t {
|
||||||
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
|
unsafe { hipMalloc(dptr.cast(), bytesize) }?;
|
||||||
@ -33,3 +34,7 @@ pub(crate) fn get_address_range_v2(
|
|||||||
) -> hipError_t {
|
) -> hipError_t {
|
||||||
unsafe { hipMemGetAddressRange(pbase, psize, dptr) }
|
unsafe { hipMemGetAddressRange(pbase, psize, dptr) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn set_d32_v2(dst: hipDeviceptr_t, ui: ::core::ffi::c_uint, n: usize) -> hipError_t {
|
||||||
|
unsafe { hipMemsetD32(dst, mem::transmute(ui), n) }
|
||||||
|
}
|
||||||
|
@ -72,6 +72,7 @@ cuda_base::cuda_function_declarations!(
|
|||||||
cuModuleUnload,
|
cuModuleUnload,
|
||||||
cuPointerGetAttribute,
|
cuPointerGetAttribute,
|
||||||
cuMemGetAddressRange_v2,
|
cuMemGetAddressRange_v2,
|
||||||
|
cuMemsetD32_v2,
|
||||||
],
|
],
|
||||||
implemented_in_function <= [
|
implemented_in_function <= [
|
||||||
cuLaunchKernel,
|
cuLaunchKernel,
|
||||||
|
@ -9,7 +9,7 @@ name = "zluda_with"
|
|||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
[target.'cfg(windows)'.dependencies]
|
||||||
winapi = { version = "0.3", features = ["jobapi2", "processthreadsapi", "synchapi", "winbase", "std"] }
|
winapi = { version = "0.3", features = ["jobapi2", "processthreadsapi", "synchapi", "winbase", "std", "processenv"] }
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
argh = "0.1"
|
argh = "0.1"
|
||||||
detours-sys = { path = "../detours-sys" }
|
detours-sys = { path = "../detours-sys" }
|
||||||
|
@ -7,6 +7,9 @@ use std::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
fn main() -> Result<(), VarError> {
|
fn main() -> Result<(), VarError> {
|
||||||
|
if std::env::var_os("CARGO_CFG_WINDOWS").is_none() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
println!("cargo:rerun-if-changed=build.rs");
|
println!("cargo:rerun-if-changed=build.rs");
|
||||||
if env::var("PROFILE")? != "debug" {
|
if env::var("PROFILE")? != "debug" {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#![crate_type = "bin"]
|
#![crate_type = "bin"]
|
||||||
|
|
||||||
#[link(name = "do_cuinit")]
|
#[link(name = "do_cuinit", kind = "raw-dylib")]
|
||||||
extern "system" {
|
extern "system" {
|
||||||
fn do_cuinit(flags: u32) -> u32;
|
fn do_cuinit(flags: u32) -> u32;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user