mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-08-02 14:57:43 +03:00
Tests for shf
This commit is contained in:
50
ptx/src/test/ll/shf_l.ll
Normal file
50
ptx/src/test/ll/shf_l.ll
Normal file
@ -0,0 +1,50 @@
|
||||
define amdgpu_kernel void @shf_l(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 8
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 8
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"59", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"60" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
|
||||
%"50" = load i32, ptr %"33", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
|
||||
%"52" = load i32, ptr %"35", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%2 = call i32 @llvm.fshl.i32(i32 %"55", i32 %"54", i32 %"56")
|
||||
%3 = icmp uge i32 %"56", 32
|
||||
%"62" = select i1 %3, i32 %"54", i32 %2
|
||||
store i32 %"62", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store i32 %"58", ptr %"63", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.fshl.i32(i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
50
ptx/src/test/ll/shf_l_clamp.ll
Normal file
50
ptx/src/test/ll/shf_l_clamp.ll
Normal file
@ -0,0 +1,50 @@
|
||||
define amdgpu_kernel void @shf_l_clamp(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 8
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 8
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"59", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"60" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
|
||||
%"50" = load i32, ptr %"33", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
|
||||
%"52" = load i32, ptr %"35", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%2 = call i32 @llvm.fshl.i32(i32 %"55", i32 %"54", i32 %"56")
|
||||
%3 = icmp uge i32 %"56", 32
|
||||
%"62" = select i1 %3, i32 %"54", i32 %2
|
||||
store i32 %"62", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store i32 %"58", ptr %"63", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.fshl.i32(i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
48
ptx/src/test/ll/shf_l_wrap.ll
Normal file
48
ptx/src/test/ll/shf_l_wrap.ll
Normal file
@ -0,0 +1,48 @@
|
||||
define amdgpu_kernel void @shf_l_wrap(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 8
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 8
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"59", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"60" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
|
||||
%"50" = load i32, ptr %"33", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
|
||||
%"52" = load i32, ptr %"35", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"62" = call i32 @llvm.fshl.i32(i32 %"55", i32 %"54", i32 %"56")
|
||||
store i32 %"62", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store i32 %"58", ptr %"63", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.fshl.i32(i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
50
ptx/src/test/ll/shf_r.ll
Normal file
50
ptx/src/test/ll/shf_r.ll
Normal file
@ -0,0 +1,50 @@
|
||||
define amdgpu_kernel void @shf_r(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 8
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 8
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"59", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"60" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
|
||||
%"50" = load i32, ptr %"33", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
|
||||
%"52" = load i32, ptr %"35", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%2 = call i32 @llvm.fshr.i32(i32 %"55", i32 %"54", i32 %"56")
|
||||
%3 = icmp uge i32 %"56", 32
|
||||
%"62" = select i1 %3, i32 %"55", i32 %2
|
||||
store i32 %"62", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store i32 %"58", ptr %"63", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.fshr.i32(i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
50
ptx/src/test/ll/shf_r_clamp.ll
Normal file
50
ptx/src/test/ll/shf_r_clamp.ll
Normal file
@ -0,0 +1,50 @@
|
||||
define amdgpu_kernel void @shf_r_clamp(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 8
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 8
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"59", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"60" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
|
||||
%"50" = load i32, ptr %"33", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
|
||||
%"52" = load i32, ptr %"35", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%2 = call i32 @llvm.fshr.i32(i32 %"55", i32 %"54", i32 %"56")
|
||||
%3 = icmp uge i32 %"56", 32
|
||||
%"62" = select i1 %3, i32 %"55", i32 %2
|
||||
store i32 %"62", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store i32 %"58", ptr %"63", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.fshr.i32(i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
48
ptx/src/test/ll/shf_r_wrap.ll
Normal file
48
ptx/src/test/ll/shf_r_wrap.ll
Normal file
@ -0,0 +1,48 @@
|
||||
define amdgpu_kernel void @shf_r_wrap(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 8
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 8
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 8
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"59", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"60" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
|
||||
%"50" = load i32, ptr %"33", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"39", align 8
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
|
||||
%"52" = load i32, ptr %"35", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"62" = call i32 @llvm.fshr.i32(i32 %"55", i32 %"54", i32 %"56")
|
||||
store i32 %"62", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store i32 %"58", ptr %"63", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.fshr.i32(i32, i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
@ -302,6 +302,12 @@ test_ptx!(tanh, [f32::INFINITY], [1.0f32]);
|
||||
test_ptx!(cp_async, [0u32], [1u32, 2u32, 3u32, 0u32]);
|
||||
|
||||
test_ptx!(nanosleep, [0u64], [0u64]);
|
||||
test_ptx!(shf_l, [0x12345678u32, 0x9abcdef0u32, 12], [0xcdef0123u32]);
|
||||
test_ptx!(shf_r, [0x12345678u32, 0x9abcdef0u32, 12], [0xef012345u32]);
|
||||
test_ptx!(shf_l_clamp, [0x12345678u32, 0x9abcdef0u32, 44], [0x12345678u32]);
|
||||
test_ptx!(shf_r_clamp, [0x12345678u32, 0x9abcdef0u32, 44], [0x9abcdef0u32]);
|
||||
test_ptx!(shf_l_wrap, [0x12345678u32, 0x9abcdef0u32, 44], [0xcdef0123u32]);
|
||||
test_ptx!(shf_r_wrap, [0x12345678u32, 0x9abcdef0u32, 44], [0xef012345u32]);
|
||||
|
||||
test_ptx!(assertfail);
|
||||
// TODO: not yet supported
|
||||
|
28
ptx/src/test/spirv_run/shf_l.ptx
Normal file
28
ptx/src/test/spirv_run/shf_l.ptx
Normal file
@ -0,0 +1,28 @@
|
||||
.version 6.5
|
||||
.target sm_32
|
||||
.address_size 64
|
||||
|
||||
.visible .entry shf_l(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .b32 in_a;
|
||||
.reg .b32 in_b;
|
||||
.reg .b32 in_c;
|
||||
.reg .u32 result;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.b32 in_a, [in_addr];
|
||||
ld.b32 in_b, [in_addr+4];
|
||||
ld.b32 in_c, [in_addr+8];
|
||||
|
||||
shf.l.clamp.b32 result, in_a, in_b, in_c;
|
||||
|
||||
st.b32 [out_addr], result;
|
||||
ret;
|
||||
}
|
28
ptx/src/test/spirv_run/shf_l_clamp.ptx
Normal file
28
ptx/src/test/spirv_run/shf_l_clamp.ptx
Normal file
@ -0,0 +1,28 @@
|
||||
.version 6.5
|
||||
.target sm_32
|
||||
.address_size 64
|
||||
|
||||
.visible .entry shf_l_clamp(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .b32 in_a;
|
||||
.reg .b32 in_b;
|
||||
.reg .b32 in_c;
|
||||
.reg .u32 result;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.b32 in_a, [in_addr];
|
||||
ld.b32 in_b, [in_addr+4];
|
||||
ld.b32 in_c, [in_addr+8];
|
||||
|
||||
shf.l.clamp.b32 result, in_a, in_b, in_c;
|
||||
|
||||
st.b32 [out_addr], result;
|
||||
ret;
|
||||
}
|
28
ptx/src/test/spirv_run/shf_l_wrap.ptx
Normal file
28
ptx/src/test/spirv_run/shf_l_wrap.ptx
Normal file
@ -0,0 +1,28 @@
|
||||
.version 6.5
|
||||
.target sm_32
|
||||
.address_size 64
|
||||
|
||||
.visible .entry shf_l_wrap(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .b32 in_a;
|
||||
.reg .b32 in_b;
|
||||
.reg .b32 in_c;
|
||||
.reg .u32 result;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.b32 in_a, [in_addr];
|
||||
ld.b32 in_b, [in_addr+4];
|
||||
ld.b32 in_c, [in_addr+8];
|
||||
|
||||
shf.l.wrap.b32 result, in_a, in_b, in_c;
|
||||
|
||||
st.b32 [out_addr], result;
|
||||
ret;
|
||||
}
|
28
ptx/src/test/spirv_run/shf_r.ptx
Normal file
28
ptx/src/test/spirv_run/shf_r.ptx
Normal file
@ -0,0 +1,28 @@
|
||||
.version 6.5
|
||||
.target sm_32
|
||||
.address_size 64
|
||||
|
||||
.visible .entry shf_r(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .b32 in_a;
|
||||
.reg .b32 in_b;
|
||||
.reg .b32 in_c;
|
||||
.reg .u32 result;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.b32 in_a, [in_addr];
|
||||
ld.b32 in_b, [in_addr+4];
|
||||
ld.b32 in_c, [in_addr+8];
|
||||
|
||||
shf.r.clamp.b32 result, in_a, in_b, in_c;
|
||||
|
||||
st.b32 [out_addr], result;
|
||||
ret;
|
||||
}
|
28
ptx/src/test/spirv_run/shf_r_clamp.ptx
Normal file
28
ptx/src/test/spirv_run/shf_r_clamp.ptx
Normal file
@ -0,0 +1,28 @@
|
||||
.version 6.5
|
||||
.target sm_32
|
||||
.address_size 64
|
||||
|
||||
.visible .entry shf_r_clamp(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .b32 in_a;
|
||||
.reg .b32 in_b;
|
||||
.reg .b32 in_c;
|
||||
.reg .u32 result;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.b32 in_a, [in_addr];
|
||||
ld.b32 in_b, [in_addr+4];
|
||||
ld.b32 in_c, [in_addr+8];
|
||||
|
||||
shf.r.clamp.b32 result, in_a, in_b, in_c;
|
||||
|
||||
st.b32 [out_addr], result;
|
||||
ret;
|
||||
}
|
28
ptx/src/test/spirv_run/shf_r_wrap.ptx
Normal file
28
ptx/src/test/spirv_run/shf_r_wrap.ptx
Normal file
@ -0,0 +1,28 @@
|
||||
.version 6.5
|
||||
.target sm_32
|
||||
.address_size 64
|
||||
|
||||
.visible .entry shf_r_wrap(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .b32 in_a;
|
||||
.reg .b32 in_b;
|
||||
.reg .b32 in_c;
|
||||
.reg .u32 result;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.b32 in_a, [in_addr];
|
||||
ld.b32 in_b, [in_addr+4];
|
||||
ld.b32 in_c, [in_addr+8];
|
||||
|
||||
shf.r.wrap.b32 result, in_a, in_b, in_c;
|
||||
|
||||
st.b32 [out_addr], result;
|
||||
ret;
|
||||
}
|
Reference in New Issue
Block a user