mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-02 14:57:43 +03:00
Update fmax test
This commit is contained in:
@ -1,51 +1,45 @@
|
|||||||
define amdgpu_kernel void @add_s32_sat(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"39" = alloca half, align 2, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"40" = alloca half, align 2, addrspace(5)
|
||||||
%"43" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca half, align 2, addrspace(5)
|
||||||
%"44" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca half, align 2, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
br label %"36"
|
br label %"34"
|
||||||
|
|
||||||
"36": ; preds = %1
|
"34": ; preds = %1
|
||||||
%"45" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"37", align 4
|
||||||
%"46" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"38", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"46" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
%"61" = inttoptr i64 %"48" to ptr
|
%"55" = inttoptr i64 %"46" to ptr
|
||||||
%"47" = load i32, ptr %"61", align 4
|
%"54" = load i16, ptr %"55", align 2
|
||||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
%"45" = bitcast i16 %"54" to half
|
||||||
%"49" = load i64, ptr addrspace(5) %"39", align 4
|
store half %"45", ptr addrspace(5) %"39", align 2
|
||||||
%"62" = inttoptr i64 %"49" to ptr
|
%"47" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
%"33" = getelementptr inbounds i8, ptr %"62", i64 4
|
%"56" = inttoptr i64 %"47" to ptr
|
||||||
%"50" = load i32, ptr %"33", align 4
|
%"33" = getelementptr inbounds i8, ptr %"56", i64 2
|
||||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
%"57" = load i16, ptr %"33", align 2
|
||||||
%"52" = load i32, ptr addrspace(5) %"41", align 4
|
%"48" = bitcast i16 %"57" to half
|
||||||
%"53" = load i32, ptr addrspace(5) %"42", align 4
|
store half %"48", ptr addrspace(5) %"40", align 2
|
||||||
%"51" = call i32 @llvm.sadd.sat.i32(i32 %"52", i32 %"53")
|
%"50" = load half, ptr addrspace(5) %"40", align 2
|
||||||
store i32 %"51", ptr addrspace(5) %"43", align 4
|
%"51" = load half, ptr addrspace(5) %"39", align 2
|
||||||
%"55" = load i32, ptr addrspace(5) %"41", align 4
|
%"49" = call half @llvm.maxnum.f16(half %"50", half %"51")
|
||||||
%"56" = load i32, ptr addrspace(5) %"42", align 4
|
store half %"49", ptr addrspace(5) %"41", align 2
|
||||||
%"54" = add i32 %"55", %"56"
|
%"52" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"54", ptr addrspace(5) %"44", align 4
|
%"53" = load half, ptr addrspace(5) %"41", align 2
|
||||||
%"57" = load i64, ptr addrspace(5) %"40", align 4
|
%"58" = inttoptr i64 %"52" to ptr
|
||||||
%"58" = load i32, ptr addrspace(5) %"43", align 4
|
%"59" = bitcast half %"53" to i16
|
||||||
%"63" = inttoptr i64 %"57" to ptr
|
store i16 %"59", ptr %"58", align 2
|
||||||
store i32 %"58", ptr %"63", align 4
|
|
||||||
%"59" = load i64, ptr addrspace(5) %"40", align 4
|
|
||||||
%"64" = inttoptr i64 %"59" to ptr
|
|
||||||
%"35" = getelementptr inbounds i8, ptr %"64", i64 4
|
|
||||||
%"60" = load i32, ptr addrspace(5) %"44", align 4
|
|
||||||
store i32 %"60", ptr %"35", align 4
|
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.sadd.sat.i32(i32, i32) #1
|
declare half @llvm.maxnum.f16(half, half) #1
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
@ -11,13 +11,15 @@
|
|||||||
.reg .u64 out_addr;
|
.reg .u64 out_addr;
|
||||||
.reg .f16 temp1;
|
.reg .f16 temp1;
|
||||||
.reg .f16 temp2;
|
.reg .f16 temp2;
|
||||||
|
.reg .f16 result1;
|
||||||
|
.reg .f16 result2;
|
||||||
|
|
||||||
ld.param.u64 in_addr, [input];
|
ld.param.u64 in_addr, [input];
|
||||||
ld.param.u64 out_addr, [output];
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
ld.b16 temp1, [in_addr];
|
ld.b16 temp1, [in_addr];
|
||||||
ld.b16 temp2, [in_addr+2];
|
ld.b16 temp2, [in_addr+2];
|
||||||
max.f16 temp1, temp1, temp2;
|
max.f16 result1, temp2, temp1;
|
||||||
st.b16 [out_addr], temp1;
|
st.b16 [out_addr], result1;
|
||||||
ret;
|
ret;
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,7 @@ use std::str;
|
|||||||
macro_rules! read_test_file {
|
macro_rules! read_test_file {
|
||||||
($file:expr) => {
|
($file:expr) => {
|
||||||
{
|
{
|
||||||
// CARGO_MANIFEST_DIR is the crate directory (ptx), but file! is relative to the workspace root (and therefore also includes ptx).
|
// CARGO_MANIFEST_DIR is the crate directory (ptx), but file! is relative to the workspace root (and therefore also includes ptx).
|
||||||
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||||
path.pop();
|
path.pop();
|
||||||
path.push(file!());
|
path.push(file!());
|
||||||
@ -175,7 +175,7 @@ test_ptx!(sin, [std::f32::consts::PI / 2f32], [1f32]);
|
|||||||
test_ptx!(cos, [std::f32::consts::PI], [-1f32]);
|
test_ptx!(cos, [std::f32::consts::PI], [-1f32]);
|
||||||
test_ptx!(lg2, [512f32], [9f32]);
|
test_ptx!(lg2, [512f32], [9f32]);
|
||||||
test_ptx!(ex2, [10f32], [1024f32]);
|
test_ptx!(ex2, [10f32], [1024f32]);
|
||||||
test_ptx!(fmax, [0u32, 0x8000u32], [0x8000u32]);
|
test_ptx!(fmax, [0u16, half::f16::NAN.to_bits()], [0u16]);
|
||||||
test_ptx!(cvt_rni, [9.5f32, 10.5f32], [10f32, 10f32]);
|
test_ptx!(cvt_rni, [9.5f32, 10.5f32], [10f32, 10f32]);
|
||||||
test_ptx!(cvt_rzi, [-13.8f32, 12.9f32], [-13f32, 12f32]);
|
test_ptx!(cvt_rzi, [-13.8f32, 12.9f32], [-13f32, 12f32]);
|
||||||
test_ptx!(cvt_s32_f32, [-13.8f32, 12.9f32], [-13i32, 13i32]);
|
test_ptx!(cvt_s32_f32, [-13.8f32, 12.9f32], [-13i32, 13i32]);
|
||||||
|
Reference in New Issue
Block a user