mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-21 00:48:49 +03:00
Implement non-coherent loads and implicit sign-extending conversions
This commit is contained in:
@ -740,6 +740,7 @@ pub struct LdDetails {
|
|||||||
pub state_space: LdStateSpace,
|
pub state_space: LdStateSpace,
|
||||||
pub caching: LdCacheOperator,
|
pub caching: LdCacheOperator,
|
||||||
pub typ: LdStType,
|
pub typ: LdStType,
|
||||||
|
pub non_coherent: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
sub_type! {
|
sub_type! {
|
||||||
|
@ -93,6 +93,7 @@ match {
|
|||||||
".min",
|
".min",
|
||||||
".nan",
|
".nan",
|
||||||
".NaN",
|
".NaN",
|
||||||
|
".nc",
|
||||||
".ne",
|
".ne",
|
||||||
".neu",
|
".neu",
|
||||||
".num",
|
".num",
|
||||||
@ -750,13 +751,38 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|||||||
|
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
||||||
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
"ld" <q:LdStQualifier?> <ss:LdNonGlobalStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
||||||
ast::Instruction::Ld(
|
ast::Instruction::Ld(
|
||||||
ast::LdDetails {
|
ast::LdDetails {
|
||||||
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
||||||
state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
|
state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
|
||||||
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
||||||
typ: t
|
typ: t,
|
||||||
|
non_coherent: false
|
||||||
|
},
|
||||||
|
ast::Arg2Ld { dst:dst, src:src }
|
||||||
|
)
|
||||||
|
},
|
||||||
|
"ld" <q:LdStQualifier?> ".global" <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
||||||
|
ast::Instruction::Ld(
|
||||||
|
ast::LdDetails {
|
||||||
|
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
||||||
|
state_space: ast::LdStateSpace::Global,
|
||||||
|
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
||||||
|
typ: t,
|
||||||
|
non_coherent: false
|
||||||
|
},
|
||||||
|
ast::Arg2Ld { dst:dst, src:src }
|
||||||
|
)
|
||||||
|
},
|
||||||
|
"ld" ".global" <cop:LdNcCacheOperator?> ".nc" <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
|
||||||
|
ast::Instruction::Ld(
|
||||||
|
ast::LdDetails {
|
||||||
|
qualifier: ast::LdStQualifier::Weak,
|
||||||
|
state_space: ast::LdStateSpace::Global,
|
||||||
|
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
||||||
|
typ: t,
|
||||||
|
non_coherent: true
|
||||||
},
|
},
|
||||||
ast::Arg2Ld { dst:dst, src:src }
|
ast::Arg2Ld { dst:dst, src:src }
|
||||||
)
|
)
|
||||||
@ -781,9 +807,8 @@ MemScope: ast::MemScope = {
|
|||||||
".sys" => ast::MemScope::Sys
|
".sys" => ast::MemScope::Sys
|
||||||
};
|
};
|
||||||
|
|
||||||
LdStateSpace: ast::LdStateSpace = {
|
LdNonGlobalStateSpace: ast::LdStateSpace = {
|
||||||
".const" => ast::LdStateSpace::Const,
|
".const" => ast::LdStateSpace::Const,
|
||||||
".global" => ast::LdStateSpace::Global,
|
|
||||||
".local" => ast::LdStateSpace::Local,
|
".local" => ast::LdStateSpace::Local,
|
||||||
".param" => ast::LdStateSpace::Param,
|
".param" => ast::LdStateSpace::Param,
|
||||||
".shared" => ast::LdStateSpace::Shared,
|
".shared" => ast::LdStateSpace::Shared,
|
||||||
@ -797,6 +822,12 @@ LdCacheOperator: ast::LdCacheOperator = {
|
|||||||
".cv" => ast::LdCacheOperator::Uncached,
|
".cv" => ast::LdCacheOperator::Uncached,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
LdNcCacheOperator: ast::LdCacheOperator = {
|
||||||
|
".ca" => ast::LdCacheOperator::Cached,
|
||||||
|
".cg" => ast::LdCacheOperator::L2Only,
|
||||||
|
".cs" => ast::LdCacheOperator::Streaming,
|
||||||
|
};
|
||||||
|
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
|
||||||
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
"mov" <pref:VectorPrefix?> <t:MovScalarType> <dst:DstOperandVec> "," <src:SrcOperandVec> => {
|
"mov" <pref:VectorPrefix?> <t:MovScalarType> <dst:DstOperandVec> "," <src:SrcOperandVec> => {
|
||||||
|
22
ptx/src/test/spirv_run/add_non_coherent.ptx
Normal file
22
ptx/src/test/spirv_run/add_non_coherent.ptx
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry add_non_coherent(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .u64 temp;
|
||||||
|
.reg .u64 temp2;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
ld.global.nc.u64 temp, [in_addr];
|
||||||
|
add.u64 temp2, temp, 1;
|
||||||
|
st.global.u64 [out_addr], temp2;
|
||||||
|
ret;
|
||||||
|
}
|
47
ptx/src/test/spirv_run/add_non_coherent.spvtxt
Normal file
47
ptx/src/test/spirv_run/add_non_coherent.spvtxt
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
OpCapability GenericPointer
|
||||||
|
OpCapability Linkage
|
||||||
|
OpCapability Addresses
|
||||||
|
OpCapability Kernel
|
||||||
|
OpCapability Int8
|
||||||
|
OpCapability Int16
|
||||||
|
OpCapability Int64
|
||||||
|
OpCapability Float16
|
||||||
|
OpCapability Float64
|
||||||
|
%23 = OpExtInstImport "OpenCL.std"
|
||||||
|
OpMemoryModel Physical64 OpenCL
|
||||||
|
OpEntryPoint Kernel %1 "add_non_coherent"
|
||||||
|
%void = OpTypeVoid
|
||||||
|
%ulong = OpTypeInt 64 0
|
||||||
|
%26 = OpTypeFunction %void %ulong %ulong
|
||||||
|
%_ptr_Function_ulong = OpTypePointer Function %ulong
|
||||||
|
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
|
||||||
|
%ulong_1 = OpConstant %ulong 1
|
||||||
|
%1 = OpFunction %void None %26
|
||||||
|
%8 = OpFunctionParameter %ulong
|
||||||
|
%9 = OpFunctionParameter %ulong
|
||||||
|
%21 = OpLabel
|
||||||
|
%2 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%3 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%4 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%5 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%6 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%7 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
OpStore %2 %8
|
||||||
|
OpStore %3 %9
|
||||||
|
%10 = OpLoad %ulong %2 Aligned 8
|
||||||
|
OpStore %4 %10
|
||||||
|
%11 = OpLoad %ulong %3 Aligned 8
|
||||||
|
OpStore %5 %11
|
||||||
|
%13 = OpLoad %ulong %4
|
||||||
|
%19 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %13
|
||||||
|
%12 = OpLoad %ulong %19 Aligned 8
|
||||||
|
OpStore %6 %12
|
||||||
|
%15 = OpLoad %ulong %6
|
||||||
|
%14 = OpIAdd %ulong %15 %ulong_1
|
||||||
|
OpStore %7 %14
|
||||||
|
%16 = OpLoad %ulong %5
|
||||||
|
%17 = OpLoad %ulong %7
|
||||||
|
%20 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %16
|
||||||
|
OpStore %20 %17 Aligned 8
|
||||||
|
OpReturn
|
||||||
|
OpFunctionEnd
|
@ -153,6 +153,8 @@ test_ptx!(shared_ptr_take_address, [97815231u64], [97815231u64]);
|
|||||||
test_ptx!(assertfail, [716523871u64], [716523872u64]);
|
test_ptx!(assertfail, [716523871u64], [716523872u64]);
|
||||||
test_ptx!(cvt_s64_s32, [-1i32], [-1i64]);
|
test_ptx!(cvt_s64_s32, [-1i32], [-1i64]);
|
||||||
test_ptx!(add_tuning, [2u64], [3u64]);
|
test_ptx!(add_tuning, [2u64], [3u64]);
|
||||||
|
test_ptx!(add_non_coherent, [3u64], [4u64]);
|
||||||
|
test_ptx!(sign_extend, [-1i16], [-1i32]);
|
||||||
|
|
||||||
struct DisplayError<T: Debug> {
|
struct DisplayError<T: Debug> {
|
||||||
err: T,
|
err: T,
|
||||||
|
20
ptx/src/test/spirv_run/sign_extend.ptx
Normal file
20
ptx/src/test/spirv_run/sign_extend.ptx
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry sign_extend(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .s32 temp;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
ld.s16 temp, [in_addr];
|
||||||
|
st.s32 [out_addr], temp;
|
||||||
|
ret;
|
||||||
|
}
|
47
ptx/src/test/spirv_run/sign_extend.spvtxt
Normal file
47
ptx/src/test/spirv_run/sign_extend.spvtxt
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
OpCapability GenericPointer
|
||||||
|
OpCapability Linkage
|
||||||
|
OpCapability Addresses
|
||||||
|
OpCapability Kernel
|
||||||
|
OpCapability Int8
|
||||||
|
OpCapability Int16
|
||||||
|
OpCapability Int64
|
||||||
|
OpCapability Float16
|
||||||
|
OpCapability Float64
|
||||||
|
%20 = OpExtInstImport "OpenCL.std"
|
||||||
|
OpMemoryModel Physical64 OpenCL
|
||||||
|
OpEntryPoint Kernel %1 "sign_extend"
|
||||||
|
%void = OpTypeVoid
|
||||||
|
%ulong = OpTypeInt 64 0
|
||||||
|
%23 = OpTypeFunction %void %ulong %ulong
|
||||||
|
%_ptr_Function_ulong = OpTypePointer Function %ulong
|
||||||
|
%uint = OpTypeInt 32 0
|
||||||
|
%_ptr_Function_uint = OpTypePointer Function %uint
|
||||||
|
%ushort = OpTypeInt 16 0
|
||||||
|
%_ptr_Generic_ushort = OpTypePointer Generic %ushort
|
||||||
|
%_ptr_Generic_uint = OpTypePointer Generic %uint
|
||||||
|
%1 = OpFunction %void None %23
|
||||||
|
%7 = OpFunctionParameter %ulong
|
||||||
|
%8 = OpFunctionParameter %ulong
|
||||||
|
%18 = OpLabel
|
||||||
|
%2 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%3 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%4 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%5 = OpVariable %_ptr_Function_ulong Function
|
||||||
|
%6 = OpVariable %_ptr_Function_uint Function
|
||||||
|
OpStore %2 %7
|
||||||
|
OpStore %3 %8
|
||||||
|
%9 = OpLoad %ulong %2 Aligned 8
|
||||||
|
OpStore %4 %9
|
||||||
|
%10 = OpLoad %ulong %3 Aligned 8
|
||||||
|
OpStore %5 %10
|
||||||
|
%12 = OpLoad %ulong %4
|
||||||
|
%16 = OpConvertUToPtr %_ptr_Generic_ushort %12
|
||||||
|
%15 = OpLoad %ushort %16 Aligned 2
|
||||||
|
%11 = OpSConvert %uint %15
|
||||||
|
OpStore %6 %11
|
||||||
|
%13 = OpLoad %ulong %5
|
||||||
|
%14 = OpLoad %uint %6
|
||||||
|
%17 = OpConvertUToPtr %_ptr_Generic_uint %13
|
||||||
|
OpStore %17 %14 Aligned 4
|
||||||
|
OpReturn
|
||||||
|
OpFunctionEnd
|
@ -4285,7 +4285,10 @@ fn emit_implicit_conversion(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(TypeKind::Scalar, TypeKind::Scalar, ConversionKind::SignExtend) => todo!(),
|
(TypeKind::Scalar, TypeKind::Scalar, ConversionKind::SignExtend) => {
|
||||||
|
let result_type = map.get_or_add(builder, SpirvType::from(cv.to.clone()));
|
||||||
|
builder.s_convert(result_type , Some(cv.dst), cv.src)?;
|
||||||
|
},
|
||||||
(TypeKind::Vector, TypeKind::Scalar, ConversionKind::Default)
|
(TypeKind::Vector, TypeKind::Scalar, ConversionKind::Default)
|
||||||
| (TypeKind::Scalar, TypeKind::Array, ConversionKind::Default)
|
| (TypeKind::Scalar, TypeKind::Array, ConversionKind::Default)
|
||||||
| (TypeKind::Array, TypeKind::Scalar, ConversionKind::Default) => {
|
| (TypeKind::Array, TypeKind::Scalar, ConversionKind::Default) => {
|
||||||
|
Reference in New Issue
Block a user