From efd91e270c8660a54549e5e843a872a79bf670c3 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Wed, 3 Mar 2021 21:22:31 +0100 Subject: [PATCH] Implement non-coherent loads and implicit sign-extending conversions --- ptx/src/ast.rs | 1 + ptx/src/ptx.lalrpop | 39 +++++++++++++-- ptx/src/test/spirv_run/add_non_coherent.ptx | 22 +++++++++ .../test/spirv_run/add_non_coherent.spvtxt | 47 +++++++++++++++++++ ptx/src/test/spirv_run/mod.rs | 2 + ptx/src/test/spirv_run/sign_extend.ptx | 20 ++++++++ ptx/src/test/spirv_run/sign_extend.spvtxt | 47 +++++++++++++++++++ ptx/src/translate.rs | 5 +- 8 files changed, 178 insertions(+), 5 deletions(-) create mode 100644 ptx/src/test/spirv_run/add_non_coherent.ptx create mode 100644 ptx/src/test/spirv_run/add_non_coherent.spvtxt create mode 100644 ptx/src/test/spirv_run/sign_extend.ptx create mode 100644 ptx/src/test/spirv_run/sign_extend.spvtxt diff --git a/ptx/src/ast.rs b/ptx/src/ast.rs index 22d378e..dc9d4cc 100644 --- a/ptx/src/ast.rs +++ b/ptx/src/ast.rs @@ -740,6 +740,7 @@ pub struct LdDetails { pub state_space: LdStateSpace, pub caching: LdCacheOperator, pub typ: LdStType, + pub non_coherent: bool, } sub_type! { diff --git a/ptx/src/ptx.lalrpop b/ptx/src/ptx.lalrpop index 631d5ad..6a0ca55 100644 --- a/ptx/src/ptx.lalrpop +++ b/ptx/src/ptx.lalrpop @@ -93,6 +93,7 @@ match { ".min", ".nan", ".NaN", + ".nc", ".ne", ".neu", ".num", @@ -750,13 +751,38 @@ Instruction: ast::Instruction> = { // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld InstLd: ast::Instruction> = { - "ld" "," => { + "ld" "," => { ast::Instruction::Ld( ast::LdDetails { qualifier: q.unwrap_or(ast::LdStQualifier::Weak), state_space: ss.unwrap_or(ast::LdStateSpace::Generic), caching: cop.unwrap_or(ast::LdCacheOperator::Cached), - typ: t + typ: t, + non_coherent: false + }, + ast::Arg2Ld { dst:dst, src:src } + ) + }, + "ld" ".global" "," => { + ast::Instruction::Ld( + ast::LdDetails { + qualifier: q.unwrap_or(ast::LdStQualifier::Weak), + state_space: ast::LdStateSpace::Global, + caching: cop.unwrap_or(ast::LdCacheOperator::Cached), + typ: t, + non_coherent: false + }, + ast::Arg2Ld { dst:dst, src:src } + ) + }, + "ld" ".global" ".nc" "," => { + ast::Instruction::Ld( + ast::LdDetails { + qualifier: ast::LdStQualifier::Weak, + state_space: ast::LdStateSpace::Global, + caching: cop.unwrap_or(ast::LdCacheOperator::Cached), + typ: t, + non_coherent: true }, ast::Arg2Ld { dst:dst, src:src } ) @@ -781,9 +807,8 @@ MemScope: ast::MemScope = { ".sys" => ast::MemScope::Sys }; -LdStateSpace: ast::LdStateSpace = { +LdNonGlobalStateSpace: ast::LdStateSpace = { ".const" => ast::LdStateSpace::Const, - ".global" => ast::LdStateSpace::Global, ".local" => ast::LdStateSpace::Local, ".param" => ast::LdStateSpace::Param, ".shared" => ast::LdStateSpace::Shared, @@ -797,6 +822,12 @@ LdCacheOperator: ast::LdCacheOperator = { ".cv" => ast::LdCacheOperator::Uncached, }; +LdNcCacheOperator: ast::LdCacheOperator = { + ".ca" => ast::LdCacheOperator::Cached, + ".cg" => ast::LdCacheOperator::L2Only, + ".cs" => ast::LdCacheOperator::Streaming, +}; + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov InstMov: ast::Instruction> = { "mov" "," => { diff --git a/ptx/src/test/spirv_run/add_non_coherent.ptx b/ptx/src/test/spirv_run/add_non_coherent.ptx new file mode 100644 index 0000000..10c35a1 --- /dev/null +++ b/ptx/src/test/spirv_run/add_non_coherent.ptx @@ -0,0 +1,22 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry add_non_coherent( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .u64 temp; + .reg .u64 temp2; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.global.nc.u64 temp, [in_addr]; + add.u64 temp2, temp, 1; + st.global.u64 [out_addr], temp2; + ret; +} diff --git a/ptx/src/test/spirv_run/add_non_coherent.spvtxt b/ptx/src/test/spirv_run/add_non_coherent.spvtxt new file mode 100644 index 0000000..99da980 --- /dev/null +++ b/ptx/src/test/spirv_run/add_non_coherent.spvtxt @@ -0,0 +1,47 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %23 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "add_non_coherent" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %26 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong + %ulong_1 = OpConstant %ulong 1 + %1 = OpFunction %void None %26 + %8 = OpFunctionParameter %ulong + %9 = OpFunctionParameter %ulong + %21 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_ulong Function + %7 = OpVariable %_ptr_Function_ulong Function + OpStore %2 %8 + OpStore %3 %9 + %10 = OpLoad %ulong %2 Aligned 8 + OpStore %4 %10 + %11 = OpLoad %ulong %3 Aligned 8 + OpStore %5 %11 + %13 = OpLoad %ulong %4 + %19 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %13 + %12 = OpLoad %ulong %19 Aligned 8 + OpStore %6 %12 + %15 = OpLoad %ulong %6 + %14 = OpIAdd %ulong %15 %ulong_1 + OpStore %7 %14 + %16 = OpLoad %ulong %5 + %17 = OpLoad %ulong %7 + %20 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %16 + OpStore %20 %17 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 4178e2f..c99de17 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -153,6 +153,8 @@ test_ptx!(shared_ptr_take_address, [97815231u64], [97815231u64]); test_ptx!(assertfail, [716523871u64], [716523872u64]); test_ptx!(cvt_s64_s32, [-1i32], [-1i64]); test_ptx!(add_tuning, [2u64], [3u64]); +test_ptx!(add_non_coherent, [3u64], [4u64]); +test_ptx!(sign_extend, [-1i16], [-1i32]); struct DisplayError { err: T, diff --git a/ptx/src/test/spirv_run/sign_extend.ptx b/ptx/src/test/spirv_run/sign_extend.ptx new file mode 100644 index 0000000..d3af0d5 --- /dev/null +++ b/ptx/src/test/spirv_run/sign_extend.ptx @@ -0,0 +1,20 @@ +.version 6.5 +.target sm_30 +.address_size 64 + +.visible .entry sign_extend( + .param .u64 input, + .param .u64 output +) +{ + .reg .u64 in_addr; + .reg .u64 out_addr; + .reg .s32 temp; + + ld.param.u64 in_addr, [input]; + ld.param.u64 out_addr, [output]; + + ld.s16 temp, [in_addr]; + st.s32 [out_addr], temp; + ret; +} \ No newline at end of file diff --git a/ptx/src/test/spirv_run/sign_extend.spvtxt b/ptx/src/test/spirv_run/sign_extend.spvtxt new file mode 100644 index 0000000..5ceffed --- /dev/null +++ b/ptx/src/test/spirv_run/sign_extend.spvtxt @@ -0,0 +1,47 @@ + OpCapability GenericPointer + OpCapability Linkage + OpCapability Addresses + OpCapability Kernel + OpCapability Int8 + OpCapability Int16 + OpCapability Int64 + OpCapability Float16 + OpCapability Float64 + %20 = OpExtInstImport "OpenCL.std" + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "sign_extend" + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %23 = OpTypeFunction %void %ulong %ulong +%_ptr_Function_ulong = OpTypePointer Function %ulong + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %ushort = OpTypeInt 16 0 +%_ptr_Generic_ushort = OpTypePointer Generic %ushort +%_ptr_Generic_uint = OpTypePointer Generic %uint + %1 = OpFunction %void None %23 + %7 = OpFunctionParameter %ulong + %8 = OpFunctionParameter %ulong + %18 = OpLabel + %2 = OpVariable %_ptr_Function_ulong Function + %3 = OpVariable %_ptr_Function_ulong Function + %4 = OpVariable %_ptr_Function_ulong Function + %5 = OpVariable %_ptr_Function_ulong Function + %6 = OpVariable %_ptr_Function_uint Function + OpStore %2 %7 + OpStore %3 %8 + %9 = OpLoad %ulong %2 Aligned 8 + OpStore %4 %9 + %10 = OpLoad %ulong %3 Aligned 8 + OpStore %5 %10 + %12 = OpLoad %ulong %4 + %16 = OpConvertUToPtr %_ptr_Generic_ushort %12 + %15 = OpLoad %ushort %16 Aligned 2 + %11 = OpSConvert %uint %15 + OpStore %6 %11 + %13 = OpLoad %ulong %5 + %14 = OpLoad %uint %6 + %17 = OpConvertUToPtr %_ptr_Generic_uint %13 + OpStore %17 %14 Aligned 4 + OpReturn + OpFunctionEnd diff --git a/ptx/src/translate.rs b/ptx/src/translate.rs index da0cc07..7566be8 100644 --- a/ptx/src/translate.rs +++ b/ptx/src/translate.rs @@ -4285,7 +4285,10 @@ fn emit_implicit_conversion( } } } - (TypeKind::Scalar, TypeKind::Scalar, ConversionKind::SignExtend) => todo!(), + (TypeKind::Scalar, TypeKind::Scalar, ConversionKind::SignExtend) => { + let result_type = map.get_or_add(builder, SpirvType::from(cv.to.clone())); + builder.s_convert(result_type , Some(cv.dst), cv.src)?; + }, (TypeKind::Vector, TypeKind::Scalar, ConversionKind::Default) | (TypeKind::Scalar, TypeKind::Array, ConversionKind::Default) | (TypeKind::Array, TypeKind::Scalar, ConversionKind::Default) => {