mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-23 01:48:56 +03:00
Implement non-coherent loads and implicit sign-extending conversions
This commit is contained in:
@ -740,6 +740,7 @@ pub struct LdDetails {
|
||||
pub state_space: LdStateSpace,
|
||||
pub caching: LdCacheOperator,
|
||||
pub typ: LdStType,
|
||||
pub non_coherent: bool,
|
||||
}
|
||||
|
||||
sub_type! {
|
||||
|
@ -93,6 +93,7 @@ match {
|
||||
".min",
|
||||
".nan",
|
||||
".NaN",
|
||||
".nc",
|
||||
".ne",
|
||||
".neu",
|
||||
".num",
|
||||
@ -750,13 +751,38 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
||||
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
||||
"ld" <q:LdStQualifier?> <ss:LdNonGlobalStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
||||
ast::Instruction::Ld(
|
||||
ast::LdDetails {
|
||||
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
||||
state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
|
||||
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
||||
typ: t
|
||||
typ: t,
|
||||
non_coherent: false
|
||||
},
|
||||
ast::Arg2Ld { dst:dst, src:src }
|
||||
)
|
||||
},
|
||||
"ld" <q:LdStQualifier?> ".global" <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
||||
ast::Instruction::Ld(
|
||||
ast::LdDetails {
|
||||
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
||||
state_space: ast::LdStateSpace::Global,
|
||||
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
||||
typ: t,
|
||||
non_coherent: false
|
||||
},
|
||||
ast::Arg2Ld { dst:dst, src:src }
|
||||
)
|
||||
},
|
||||
"ld" ".global" <cop:LdNcCacheOperator?> ".nc" <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
||||
ast::Instruction::Ld(
|
||||
ast::LdDetails {
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
state_space: ast::LdStateSpace::Global,
|
||||
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
||||
typ: t,
|
||||
non_coherent: true
|
||||
},
|
||||
ast::Arg2Ld { dst:dst, src:src }
|
||||
)
|
||||
@ -781,9 +807,8 @@ MemScope: ast::MemScope = {
|
||||
".sys" => ast::MemScope::Sys
|
||||
};
|
||||
|
||||
LdStateSpace: ast::LdStateSpace = {
|
||||
LdNonGlobalStateSpace: ast::LdStateSpace = {
|
||||
".const" => ast::LdStateSpace::Const,
|
||||
".global" => ast::LdStateSpace::Global,
|
||||
".local" => ast::LdStateSpace::Local,
|
||||
".param" => ast::LdStateSpace::Param,
|
||||
".shared" => ast::LdStateSpace::Shared,
|
||||
@ -797,6 +822,12 @@ LdCacheOperator: ast::LdCacheOperator = {
|
||||
".cv" => ast::LdCacheOperator::Uncached,
|
||||
};
|
||||
|
||||
LdNcCacheOperator: ast::LdCacheOperator = {
|
||||
".ca" => ast::LdCacheOperator::Cached,
|
||||
".cg" => ast::LdCacheOperator::L2Only,
|
||||
".cs" => ast::LdCacheOperator::Streaming,
|
||||
};
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
|
||||
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||
"mov" <pref:VectorPrefix?> <t:MovScalarType> <dst:DstOperandVec> "," <src:SrcOperandVec> => {
|
||||
|
22
ptx/src/test/spirv_run/add_non_coherent.ptx
Normal file
22
ptx/src/test/spirv_run/add_non_coherent.ptx
Normal file
@ -0,0 +1,22 @@
|
||||
.version 6.5
|
||||
.target sm_30
|
||||
.address_size 64
|
||||
|
||||
.visible .entry add_non_coherent(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .u64 temp;
|
||||
.reg .u64 temp2;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.global.nc.u64 temp, [in_addr];
|
||||
add.u64 temp2, temp, 1;
|
||||
st.global.u64 [out_addr], temp2;
|
||||
ret;
|
||||
}
|
47
ptx/src/test/spirv_run/add_non_coherent.spvtxt
Normal file
47
ptx/src/test/spirv_run/add_non_coherent.spvtxt
Normal file
@ -0,0 +1,47 @@
|
||||
OpCapability GenericPointer
|
||||
OpCapability Linkage
|
||||
OpCapability Addresses
|
||||
OpCapability Kernel
|
||||
OpCapability Int8
|
||||
OpCapability Int16
|
||||
OpCapability Int64
|
||||
OpCapability Float16
|
||||
OpCapability Float64
|
||||
%23 = OpExtInstImport "OpenCL.std"
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "add_non_coherent"
|
||||
%void = OpTypeVoid
|
||||
%ulong = OpTypeInt 64 0
|
||||
%26 = OpTypeFunction %void %ulong %ulong
|
||||
%_ptr_Function_ulong = OpTypePointer Function %ulong
|
||||
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
|
||||
%ulong_1 = OpConstant %ulong 1
|
||||
%1 = OpFunction %void None %26
|
||||
%8 = OpFunctionParameter %ulong
|
||||
%9 = OpFunctionParameter %ulong
|
||||
%21 = OpLabel
|
||||
%2 = OpVariable %_ptr_Function_ulong Function
|
||||
%3 = OpVariable %_ptr_Function_ulong Function
|
||||
%4 = OpVariable %_ptr_Function_ulong Function
|
||||
%5 = OpVariable %_ptr_Function_ulong Function
|
||||
%6 = OpVariable %_ptr_Function_ulong Function
|
||||
%7 = OpVariable %_ptr_Function_ulong Function
|
||||
OpStore %2 %8
|
||||
OpStore %3 %9
|
||||
%10 = OpLoad %ulong %2 Aligned 8
|
||||
OpStore %4 %10
|
||||
%11 = OpLoad %ulong %3 Aligned 8
|
||||
OpStore %5 %11
|
||||
%13 = OpLoad %ulong %4
|
||||
%19 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %13
|
||||
%12 = OpLoad %ulong %19 Aligned 8
|
||||
OpStore %6 %12
|
||||
%15 = OpLoad %ulong %6
|
||||
%14 = OpIAdd %ulong %15 %ulong_1
|
||||
OpStore %7 %14
|
||||
%16 = OpLoad %ulong %5
|
||||
%17 = OpLoad %ulong %7
|
||||
%20 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %16
|
||||
OpStore %20 %17 Aligned 8
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -153,6 +153,8 @@ test_ptx!(shared_ptr_take_address, [97815231u64], [97815231u64]);
|
||||
test_ptx!(assertfail, [716523871u64], [716523872u64]);
|
||||
test_ptx!(cvt_s64_s32, [-1i32], [-1i64]);
|
||||
test_ptx!(add_tuning, [2u64], [3u64]);
|
||||
test_ptx!(add_non_coherent, [3u64], [4u64]);
|
||||
test_ptx!(sign_extend, [-1i16], [-1i32]);
|
||||
|
||||
struct DisplayError<T: Debug> {
|
||||
err: T,
|
||||
|
20
ptx/src/test/spirv_run/sign_extend.ptx
Normal file
20
ptx/src/test/spirv_run/sign_extend.ptx
Normal file
@ -0,0 +1,20 @@
|
||||
.version 6.5
|
||||
.target sm_30
|
||||
.address_size 64
|
||||
|
||||
.visible .entry sign_extend(
|
||||
.param .u64 input,
|
||||
.param .u64 output
|
||||
)
|
||||
{
|
||||
.reg .u64 in_addr;
|
||||
.reg .u64 out_addr;
|
||||
.reg .s32 temp;
|
||||
|
||||
ld.param.u64 in_addr, [input];
|
||||
ld.param.u64 out_addr, [output];
|
||||
|
||||
ld.s16 temp, [in_addr];
|
||||
st.s32 [out_addr], temp;
|
||||
ret;
|
||||
}
|
47
ptx/src/test/spirv_run/sign_extend.spvtxt
Normal file
47
ptx/src/test/spirv_run/sign_extend.spvtxt
Normal file
@ -0,0 +1,47 @@
|
||||
OpCapability GenericPointer
|
||||
OpCapability Linkage
|
||||
OpCapability Addresses
|
||||
OpCapability Kernel
|
||||
OpCapability Int8
|
||||
OpCapability Int16
|
||||
OpCapability Int64
|
||||
OpCapability Float16
|
||||
OpCapability Float64
|
||||
%20 = OpExtInstImport "OpenCL.std"
|
||||
OpMemoryModel Physical64 OpenCL
|
||||
OpEntryPoint Kernel %1 "sign_extend"
|
||||
%void = OpTypeVoid
|
||||
%ulong = OpTypeInt 64 0
|
||||
%23 = OpTypeFunction %void %ulong %ulong
|
||||
%_ptr_Function_ulong = OpTypePointer Function %ulong
|
||||
%uint = OpTypeInt 32 0
|
||||
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||
%ushort = OpTypeInt 16 0
|
||||
%_ptr_Generic_ushort = OpTypePointer Generic %ushort
|
||||
%_ptr_Generic_uint = OpTypePointer Generic %uint
|
||||
%1 = OpFunction %void None %23
|
||||
%7 = OpFunctionParameter %ulong
|
||||
%8 = OpFunctionParameter %ulong
|
||||
%18 = OpLabel
|
||||
%2 = OpVariable %_ptr_Function_ulong Function
|
||||
%3 = OpVariable %_ptr_Function_ulong Function
|
||||
%4 = OpVariable %_ptr_Function_ulong Function
|
||||
%5 = OpVariable %_ptr_Function_ulong Function
|
||||
%6 = OpVariable %_ptr_Function_uint Function
|
||||
OpStore %2 %7
|
||||
OpStore %3 %8
|
||||
%9 = OpLoad %ulong %2 Aligned 8
|
||||
OpStore %4 %9
|
||||
%10 = OpLoad %ulong %3 Aligned 8
|
||||
OpStore %5 %10
|
||||
%12 = OpLoad %ulong %4
|
||||
%16 = OpConvertUToPtr %_ptr_Generic_ushort %12
|
||||
%15 = OpLoad %ushort %16 Aligned 2
|
||||
%11 = OpSConvert %uint %15
|
||||
OpStore %6 %11
|
||||
%13 = OpLoad %ulong %5
|
||||
%14 = OpLoad %uint %6
|
||||
%17 = OpConvertUToPtr %_ptr_Generic_uint %13
|
||||
OpStore %17 %14 Aligned 4
|
||||
OpReturn
|
||||
OpFunctionEnd
|
@ -4285,7 +4285,10 @@ fn emit_implicit_conversion(
|
||||
}
|
||||
}
|
||||
}
|
||||
(TypeKind::Scalar, TypeKind::Scalar, ConversionKind::SignExtend) => todo!(),
|
||||
(TypeKind::Scalar, TypeKind::Scalar, ConversionKind::SignExtend) => {
|
||||
let result_type = map.get_or_add(builder, SpirvType::from(cv.to.clone()));
|
||||
builder.s_convert(result_type , Some(cv.dst), cv.src)?;
|
||||
},
|
||||
(TypeKind::Vector, TypeKind::Scalar, ConversionKind::Default)
|
||||
| (TypeKind::Scalar, TypeKind::Array, ConversionKind::Default)
|
||||
| (TypeKind::Array, TypeKind::Scalar, ConversionKind::Default) => {
|
||||
|
Reference in New Issue
Block a user