ZLUDA/ptx/src/ptx.lalrpop
2020-09-30 19:27:29 +02:00

1163 lines
35 KiB
Plaintext

use crate::ast;
use crate::ast::UnwrapWithVec;
use crate::{without_none, vector_index};
grammar<'a>(errors: &mut Vec<ast::PtxError>);
extern {
type Error = ast::PtxError;
}
match {
r"\s+" => { },
r"//[^\n\r]*[\n\r]*" => { },
r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { },
r"-?[?:0x]?[0-9]+" => Num,
r#""[^"]*""# => String,
r"[0-9]+\.[0-9]+" => VersionNumber,
"!",
"(", ")",
"+",
",",
".",
":",
";",
"@",
"[", "]",
"{", "}",
"<", ">",
"|",
".acquire",
".address_size",
".align",
".and",
".b16",
".b32",
".b64",
".b8",
".ca",
".cg",
".const",
".cs",
".cta",
".cv",
".entry",
".eq",
".equ",
".extern",
".f16",
".f16x2",
".f32",
".f64",
".file",
".ftz",
".func",
".ge",
".geu",
".global",
".gpu",
".gt",
".gtu",
".hi",
".hs",
".le",
".leu",
".lo",
".loc",
".local",
".ls",
".lt",
".ltu",
".lu",
".nan",
".ne",
".neu",
".num",
".or",
".param",
".pred",
".reg",
".relaxed",
".rm",
".rmi",
".rn",
".rni",
".rp",
".rpi",
".rz",
".rzi",
".s16",
".s32",
".s64",
".s8" ,
".sat",
".section",
".shared",
".sreg",
".sys",
".target",
".to",
".u16",
".u32",
".u64",
".u8" ,
".uni",
".v2",
".v4",
".version",
".visible",
".volatile",
".wb",
".weak",
".wide",
".wt",
".xor",
} else {
// IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID
"abs",
"add",
"bra",
"call",
"cvt",
"cvta",
"debug",
"ld",
"mad",
"map_f64_to_f32",
"mov",
"mul",
"not",
"ret",
"setp",
"shl",
"shr",
r"sm_[0-9]+" => ShaderModel,
"st",
"texmode_independent",
"texmode_unified",
} else {
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers
r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID,
r"\.[a-zA-Z][a-zA-Z0-9_$]*" => DotID,
}
ExtendedID : &'input str = {
"abs",
"add",
"bra",
"call",
"cvt",
"cvta",
"debug",
"ld",
"mad",
"map_f64_to_f32",
"mov",
"mul",
"not",
"ret",
"setp",
"shl",
"shr",
ShaderModel,
"st",
"texmode_independent",
"texmode_unified",
ID
}
pub Module: ast::Module<'input> = {
<v:Version> Target <f:Directive*> => {
ast::Module { version: v, functions: without_none(f) }
}
};
Version: (u8, u8) = {
".version" <v:VersionNumber> => {
let dot = v.find('.').unwrap();
let major = v[..dot].parse::<u8>();
let minor = v[dot+1..].parse::<u8>();
(major,minor).unwrap_with(errors)
}
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target
Target = {
".target" Comma<TargetSpecifier>
};
TargetSpecifier = {
ShaderModel,
"texmode_unified",
"texmode_independent",
"debug",
"map_f64_to_f32"
};
Directive: Option<ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>> = {
AddressSize => None,
<f:Function> => Some(f),
File => None,
Section => None
};
AddressSize = {
".address_size" Num
};
Function: ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>> = {
LinkingDirective*
<func_directive:MethodDecl>
<body:FunctionBody> => ast::Function{<>}
};
LinkingDirective = {
".extern",
".visible",
".weak"
};
MethodDecl: ast::MethodDecl<'input, &'input str> = {
".entry" <name:ExtendedID> <params:KernelArguments> => ast::MethodDecl::Kernel(name, params),
".func" <ret_vals:FnArguments?> <name:ExtendedID> <params:FnArguments> => {
ast::MethodDecl::Func(ret_vals.unwrap_or_else(|| Vec::new()), name, params)
}
};
KernelArguments: Vec<ast::KernelArgument<&'input str>> = {
"(" <args:Comma<KernelInput>> ")" => args
};
FnArguments: Vec<ast::FnArgument<&'input str>> = {
"(" <args:Comma<FnInput>> ")" => args
};
KernelInput: ast::Variable<ast::VariableParamType, &'input str> = {
<v:ParamVariable> => {
let (align, v_type, name) = v;
ast::Variable{ align, v_type, name }
}
}
FnInput: ast::Variable<ast::FnArgumentType, &'input str> = {
<v:RegVariable> => {
let (align, v_type, name) = v;
let v_type = ast::FnArgumentType::Reg(v_type);
ast::Variable{ align, v_type, name }
},
<v:ParamVariable> => {
let (align, v_type, name) = v;
let v_type = ast::FnArgumentType::Param(v_type);
ast::Variable{ align, v_type, name }
}
}
pub(crate) FunctionBody: Option<Vec<ast::Statement<ast::ParsedArgParams<'input>>>> = {
"{" <s:Statement*> "}" => { Some(without_none(s)) },
";" => { None }
};
StateSpaceSpecifier: ast::StateSpace = {
".reg" => ast::StateSpace::Reg,
".sreg" => ast::StateSpace::Sreg,
".const" => ast::StateSpace::Const,
".global" => ast::StateSpace::Global,
".local" => ast::StateSpace::Local,
".shared" => ast::StateSpace::Shared,
".param" => ast::StateSpace::Param, // used to prepare function call
};
ScalarType: ast::ScalarType = {
".f16" => ast::ScalarType::F16,
".f16x2" => ast::ScalarType::F16x2,
".pred" => ast::ScalarType::Pred,
LdStScalarType
};
LdStScalarType: ast::ScalarType = {
".b8" => ast::ScalarType::B8,
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u8" => ast::ScalarType::U8,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s8" => ast::ScalarType::S8,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
Statement: Option<ast::Statement<ast::ParsedArgParams<'input>>> = {
<l:Label> => Some(ast::Statement::Label(l)),
DebugDirective => None,
<v:MultiVariable> ";" => Some(ast::Statement::Variable(v)),
<p:PredAt?> <i:Instruction> ";" => Some(ast::Statement::Instruction(p, i)),
"{" <s:Statement*> "}" => Some(ast::Statement::Block(without_none(s)))
};
DebugDirective: () = {
DebugLocation
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc
DebugLocation = {
".loc" Num Num Num
};
Label: &'input str = {
<id:ExtendedID> ":" => id
};
Align: u32 = {
".align" <a:Num> => {
let align = a.parse::<u32>();
align.unwrap_with(errors)
}
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
MultiVariable: ast::MultiVariable<&'input str> = {
<var:Variable> <count:VariableParam?> => ast::MultiVariable{<>}
}
VariableParam: u32 = {
"<" <n:Num> ">" => {
let size = n.parse::<u32>();
size.unwrap_with(errors)
}
}
Variable: ast::Variable<ast::VariableType, &'input str> = {
<v:RegVariable> => {
let (align, v_type, name) = v;
let v_type = ast::VariableType::Reg(v_type);
ast::Variable {align, v_type, name}
},
LocalVariable,
<v:ParamVariable> => {
let (align, v_type, name) = v;
let v_type = ast::VariableType::Param(v_type);
ast::Variable {align, v_type, name}
},
};
RegVariable: (Option<u32>, ast::VariableRegType, &'input str) = {
".reg" <align:Align?> <t:ScalarType> <name:ExtendedID> => {
let v_type = ast::VariableRegType::Scalar(t);
(align, v_type, name)
},
".reg" <align:Align?> <v_len:VectorPrefix> <t:SizedScalarType> <name:ExtendedID> => {
let v_type = ast::VariableRegType::Vector(t, v_len);
(align, v_type, name)
}
}
LocalVariable: ast::Variable<ast::VariableType, &'input str> = {
".local" <align:Align?> <t:SizedScalarType> <name:ExtendedID> => {
let v_type = ast::VariableType::Local(ast::VariableLocalType::Scalar(t));
ast::Variable {align, v_type, name}
},
".local" <align:Align?> <v_len:VectorPrefix> <t:SizedScalarType> <name:ExtendedID> => {
let v_type = ast::VariableType::Local(ast::VariableLocalType::Vector(t, v_len));
ast::Variable {align, v_type, name}
},
".local" <align:Align?> <t:SizedScalarType> <name:ExtendedID> <arr:ArraySpecifier> => {
let v_type = ast::VariableType::Local(ast::VariableLocalType::Array(t, arr));
ast::Variable {align, v_type, name}
}
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
ParamVariable: (Option<u32>, ast::VariableParamType, &'input str) = {
".param" <align:Align?> <t:ParamScalarType> <name:ExtendedID> => {
let v_type = ast::VariableParamType::Scalar(t);
(align, v_type, name)
},
".param" <align:Align?> <t:SizedScalarType> <name:ExtendedID> <arr:ArraySpecifier> => {
let v_type = ast::VariableParamType::Array(t, arr);
(align, v_type, name)
}
}
#[inline]
SizedScalarType: ast::SizedScalarType = {
".b8" => ast::SizedScalarType::B8,
".b16" => ast::SizedScalarType::B16,
".b32" => ast::SizedScalarType::B32,
".b64" => ast::SizedScalarType::B64,
".u8" => ast::SizedScalarType::U8,
".u16" => ast::SizedScalarType::U16,
".u32" => ast::SizedScalarType::U32,
".u64" => ast::SizedScalarType::U64,
".s8" => ast::SizedScalarType::S8,
".s16" => ast::SizedScalarType::S16,
".s32" => ast::SizedScalarType::S32,
".s64" => ast::SizedScalarType::S64,
".f16" => ast::SizedScalarType::F16,
".f16x2" => ast::SizedScalarType::F16x2,
".f32" => ast::SizedScalarType::F32,
".f64" => ast::SizedScalarType::F64,
}
#[inline]
ParamScalarType: ast::ParamScalarType = {
".b8" => ast::ParamScalarType::B8,
".b16" => ast::ParamScalarType::B16,
".b32" => ast::ParamScalarType::B32,
".b64" => ast::ParamScalarType::B64,
".u8" => ast::ParamScalarType::U8,
".u16" => ast::ParamScalarType::U16,
".u32" => ast::ParamScalarType::U32,
".u64" => ast::ParamScalarType::U64,
".s8" => ast::ParamScalarType::S8,
".s16" => ast::ParamScalarType::S16,
".s32" => ast::ParamScalarType::S32,
".s64" => ast::ParamScalarType::S64,
".f16" => ast::ParamScalarType::F16,
".f32" => ast::ParamScalarType::F32,
".f64" => ast::ParamScalarType::F64,
}
ArraySpecifier: u32 = {
"[" <n:Num> "]" => {
let size = n.parse::<u32>();
size.unwrap_with(errors)
}
};
Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
InstLd,
InstMov,
InstMul,
InstAdd,
InstSetp,
InstNot,
InstBra,
InstCvt,
InstShl,
InstSt,
InstRet,
InstCvta,
InstCall,
InstAbs,
InstMad
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:IdOrVector> "," <src:MemoryOperand> => {
ast::Instruction::Ld(
ast::LdDetails {
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
typ: t
},
ast::Arg2Ld { dst:dst, src:src }
)
}
};
IdOrVector: ast::IdOrVector<&'input str> = {
<dst:ExtendedID> => ast::IdOrVector::Reg(dst),
<dst:VectorExtract> => ast::IdOrVector::Vec(dst)
}
OperandOrVector: ast::OperandOrVector<&'input str> = {
<op:Operand> => ast::OperandOrVector::from(op),
<dst:VectorExtract> => ast::OperandOrVector::Vec(dst)
}
LdStType: ast::Type = {
<v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
<t:LdStScalarType> => ast::Type::Scalar(t),
}
LdStQualifier: ast::LdStQualifier = {
".weak" => ast::LdStQualifier::Weak,
".volatile" => ast::LdStQualifier::Volatile,
".relaxed" <s:LdScope> => ast::LdStQualifier::Relaxed(s),
".acquire" <s:LdScope> => ast::LdStQualifier::Acquire(s),
};
LdScope: ast::LdScope = {
".cta" => ast::LdScope::Cta,
".gpu" => ast::LdScope::Gpu,
".sys" => ast::LdScope::Sys
};
LdStateSpace: ast::LdStateSpace = {
".const" => ast::LdStateSpace::Const,
".global" => ast::LdStateSpace::Global,
".local" => ast::LdStateSpace::Local,
".param" => ast::LdStateSpace::Param,
".shared" => ast::LdStateSpace::Shared,
};
LdCacheOperator: ast::LdCacheOperator = {
".ca" => ast::LdCacheOperator::Cached,
".cg" => ast::LdCacheOperator::L2Only,
".cs" => ast::LdCacheOperator::Streaming,
".lu" => ast::LdCacheOperator::LastUse,
".cv" => ast::LdCacheOperator::Uncached,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
<m:MovNormal> => ast::Instruction::Mov(m.0, m.1),
<m:MovVector> => ast::Instruction::Mov(m.0, m.1),
};
MovNormal: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
"mov" <t:MovScalarType> <dst:ExtendedID> "," <src:Operand> => {(
ast::MovDetails::new(ast::Type::Scalar(t)),
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: ast::IdOrVector::Reg(dst), src: src.into() })
)},
"mov" <pref:VectorPrefix> <t:MovVectorType> <dst:IdOrVector> "," <src:OperandOrVector> => {(
ast::MovDetails::new(ast::Type::Vector(t, pref)),
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: dst, src: src })
)}
}
MovVector: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
"mov" <t:MovVectorType> <a:Arg2MovMember> => {(
ast::MovDetails::new(ast::Type::Scalar(t.into())),
ast::Arg2Mov::Member(a)
)},
}
#[inline]
MovScalarType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
".pred" => ast::ScalarType::Pred
};
#[inline]
MovVectorType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
InstMul: ast::Instruction<ast::ParsedArgParams<'input>> = {
"mul" <d:InstMulMode> <a:Arg3> => ast::Instruction::Mul(d, a)
};
InstMulMode: ast::MulDetails = {
<ctr:MulIntControl> <t:IntType> => ast::MulDetails::Int(ast::MulIntDesc {
typ: t,
control: ctr
}),
<r:RoundingModeFloat?> <ftz:".ftz"?> <s:".sat"?> ".f32" => ast::MulDetails::Float(ast::MulFloatDesc {
typ: ast::FloatType::F32,
rounding: r,
flush_to_zero: ftz.is_some(),
saturate: s.is_some()
}),
<r:RoundingModeFloat?> ".f64" => ast::MulDetails::Float(ast::MulFloatDesc {
typ: ast::FloatType::F64,
rounding: r,
flush_to_zero: false,
saturate: false
}),
<r:".rn"?> <ftz:".ftz"?> <s:".sat"?> ".f16" => ast::MulDetails::Float(ast::MulFloatDesc {
typ: ast::FloatType::F16,
rounding: r.map(|_| ast::RoundingMode::NearestEven),
flush_to_zero: ftz.is_some(),
saturate: s.is_some()
}),
<r:".rn"?> <ftz:".ftz"?> <s:".sat"?> ".f16x2" => ast::MulDetails::Float(ast::MulFloatDesc {
typ: ast::FloatType::F16x2,
rounding: r.map(|_| ast::RoundingMode::NearestEven),
flush_to_zero: ftz.is_some(),
saturate: s.is_some()
})
};
MulIntControl: ast::MulIntControl = {
".hi" => ast::MulIntControl::High,
".lo" => ast::MulIntControl::Low,
".wide" => ast::MulIntControl::Wide
};
#[inline]
RoundingModeFloat : ast::RoundingMode = {
".rn" => ast::RoundingMode::NearestEven,
".rz" => ast::RoundingMode::Zero,
".rm" => ast::RoundingMode::NegativeInf,
".rp" => ast::RoundingMode::PositiveInf,
};
RoundingModeInt : ast::RoundingMode = {
".rni" => ast::RoundingMode::NearestEven,
".rzi" => ast::RoundingMode::Zero,
".rmi" => ast::RoundingMode::NegativeInf,
".rpi" => ast::RoundingMode::PositiveInf,
};
IntType : ast::IntType = {
".u16" => ast::IntType::U16,
".u32" => ast::IntType::U32,
".u64" => ast::IntType::U64,
".s16" => ast::IntType::S16,
".s32" => ast::IntType::S32,
".s64" => ast::IntType::S64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add
InstAdd: ast::Instruction<ast::ParsedArgParams<'input>> = {
"add" <d:InstAddMode> <a:Arg3> => ast::Instruction::Add(d, a)
};
InstAddMode: ast::AddDetails = {
<t:IntType> => ast::AddDetails::Int(ast::AddIntDesc {
typ: t,
saturate: false,
}),
".sat" ".s32" => ast::AddDetails::Int(ast::AddIntDesc {
typ: ast::IntType::S32,
saturate: true,
}),
<rn:RoundingModeFloat?> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::AddDetails::Float(ast::AddFloatDesc {
typ: ast::FloatType::F32,
rounding: rn,
flush_to_zero: ftz.is_some(),
saturate: sat.is_some(),
}),
<rn:RoundingModeFloat?> ".f64" => ast::AddDetails::Float(ast::AddFloatDesc {
typ: ast::FloatType::F64,
rounding: rn,
flush_to_zero: false,
saturate: false,
}),
<rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?>".f16" => ast::AddDetails::Float(ast::AddFloatDesc {
typ: ast::FloatType::F16,
rounding: rn.map(|_| ast::RoundingMode::NearestEven),
flush_to_zero: ftz.is_some(),
saturate: sat.is_some(),
}),
".rn"? ".ftz"? ".sat"? ".f16x2" => todo!()
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp
// TODO: support f16 setp
InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = {
"setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a),
"setp" <d:SetpBoolMode> <a:Arg5> => ast::Instruction::SetpBool(d, a),
};
SetpMode: ast::SetpData = {
<cmp_op:SetpCompareOp> <ftz:".ftz"?> <t:SetpType> => ast::SetpData{
typ: t,
flush_to_zero: ftz.is_some(),
cmp_op: cmp_op,
}
};
SetpBoolMode: ast::SetpBoolData = {
<cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <ftz:".ftz"?> <t:SetpType> => ast::SetpBoolData{
typ: t,
flush_to_zero: ftz.is_some(),
cmp_op: cmp_op,
bool_op: bool_op,
}
};
SetpCompareOp: ast::SetpCompareOp = {
".eq" => ast::SetpCompareOp::Eq,
".ne" => ast::SetpCompareOp::NotEq,
".lt" => ast::SetpCompareOp::Less,
".le" => ast::SetpCompareOp::LessOrEq,
".gt" => ast::SetpCompareOp::Greater,
".ge" => ast::SetpCompareOp::GreaterOrEq,
".lo" => ast::SetpCompareOp::Less,
".ls" => ast::SetpCompareOp::LessOrEq,
".hi" => ast::SetpCompareOp::Greater,
".hs" => ast::SetpCompareOp::GreaterOrEq,
".equ" => ast::SetpCompareOp::NanEq,
".neu" => ast::SetpCompareOp::NanNotEq,
".ltu" => ast::SetpCompareOp::NanLess,
".leu" => ast::SetpCompareOp::NanLessOrEq,
".gtu" => ast::SetpCompareOp::NanGreater,
".geu" => ast::SetpCompareOp::NanGreaterOrEq,
".num" => ast::SetpCompareOp::IsNotNan,
".nan" => ast::SetpCompareOp::IsNan,
};
SetpBoolPostOp: ast::SetpBoolPostOp = {
".and" => ast::SetpBoolPostOp::And,
".or" => ast::SetpBoolPostOp::Or,
".xor" => ast::SetpBoolPostOp::Xor,
};
SetpType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not
InstNot: ast::Instruction<ast::ParsedArgParams<'input>> = {
"not" <t:NotType> <a:Arg2> => ast::Instruction::Not(t, a)
};
NotType: ast::NotType = {
".pred" => ast::NotType::Pred,
".b16" => ast::NotType::B16,
".b32" => ast::NotType::B32,
".b64" => ast::NotType::B64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at
PredAt: ast::PredAt<&'input str> = {
"@" <label:ExtendedID> => ast::PredAt { not: false, label:label },
"@" "!" <label:ExtendedID> => ast::PredAt { not: true, label:label }
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra
InstBra: ast::Instruction<ast::ParsedArgParams<'input>> = {
"bra" <u:".uni"?> <a:Arg1> => ast::Instruction::Bra(ast::BraData{ uniform: u.is_some() }, a)
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt
InstCvt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"cvt" <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeInt> <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::new_int_from_int_checked(
s.is_some(),
dst_t,
src_t,
errors
),
a)
},
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeFloat> <src_t:CvtTypeInt> <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::new_float_from_int_checked(
r,
f.is_some(),
s.is_some(),
dst_t,
src_t,
errors
),
a)
},
"cvt" <r:RoundingModeInt> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeFloat> <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::new_int_from_float_checked(
r,
f.is_some(),
s.is_some(),
dst_t,
src_t,
errors
),
a)
},
"cvt" <r:RoundingModeInt?> <s:".sat"?> ".f16" ".f16" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: r,
flush_to_zero: false,
saturate: s.is_some(),
dst: ast::FloatType::F16,
src: ast::FloatType::F16
}
), a)
},
"cvt" <f:".ftz"?> <s:".sat"?> ".f32" ".f16" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: None,
flush_to_zero: f.is_some(),
saturate: s.is_some(),
dst: ast::FloatType::F32,
src: ast::FloatType::F16
}
), a)
},
"cvt" <s:".sat"?> ".f64" ".f16" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: None,
flush_to_zero: false,
saturate: s.is_some(),
dst: ast::FloatType::F64,
src: ast::FloatType::F16
}
), a)
},
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f16" ".f32" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: Some(r),
flush_to_zero: f.is_some(),
saturate: s.is_some(),
dst: ast::FloatType::F16,
src: ast::FloatType::F32
}
), a)
},
"cvt" <r:RoundingModeFloat?> <f:".ftz"?> <s:".sat"?> ".f32" ".f32" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: r,
flush_to_zero: f.is_some(),
saturate: s.is_some(),
dst: ast::FloatType::F32,
src: ast::FloatType::F32
}
), a)
},
"cvt" <s:".sat"?> ".f64" ".f32" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: None,
flush_to_zero: false,
saturate: s.is_some(),
dst: ast::FloatType::F64,
src: ast::FloatType::F32
}
), a)
},
"cvt" <r:RoundingModeFloat> <s:".sat"?> ".f16" ".f64" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: Some(r),
flush_to_zero: false,
saturate: s.is_some(),
dst: ast::FloatType::F16,
src: ast::FloatType::F64
}
), a)
},
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f32" ".f64" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: Some(r),
flush_to_zero: s.is_some(),
saturate: s.is_some(),
dst: ast::FloatType::F32,
src: ast::FloatType::F64
}
), a)
},
"cvt" <r:RoundingModeFloat?> <s:".sat"?> ".f64" ".f64" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: r,
flush_to_zero: false,
saturate: s.is_some(),
dst: ast::FloatType::F64,
src: ast::FloatType::F64
}
), a)
},
};
CvtTypeInt: ast::IntType = {
".u8" => ast::IntType::U8,
".u16" => ast::IntType::U16,
".u32" => ast::IntType::U32,
".u64" => ast::IntType::U64,
".s8" => ast::IntType::S8,
".s16" => ast::IntType::S16,
".s32" => ast::IntType::S32,
".s64" => ast::IntType::S64,
};
CvtTypeFloat: ast::FloatType = {
".f16" => ast::FloatType::F16,
".f32" => ast::FloatType::F32,
".f64" => ast::FloatType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl
InstShl: ast::Instruction<ast::ParsedArgParams<'input>> = {
"shl" <t:ShlType> <a:Arg3> => ast::Instruction::Shl(t, a)
};
ShlType: ast::ShlType = {
".b16" => ast::ShlType::B16,
".b32" => ast::ShlType::B32,
".b64" => ast::ShlType::B64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
// Warning: NVIDIA documentation is incorrect, you can specify scope only once
InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:OperandOrVector> => {
ast::Instruction::St(
ast::StData {
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
state_space: ss.unwrap_or(ast::StStateSpace::Generic),
caching: cop.unwrap_or(ast::StCacheOperator::Writeback),
typ: t
},
ast::Arg2St { src1:src1, src2:src2 }
)
}
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#using-addresses-arrays-and-vectors
MemoryOperand: ast::Operand<&'input str> = {
"[" <o:Operand> "]" => o
}
StStateSpace: ast::StStateSpace = {
".global" => ast::StStateSpace::Global,
".local" => ast::StStateSpace::Local,
".param" => ast::StStateSpace::Param,
".shared" => ast::StStateSpace::Shared,
};
StCacheOperator: ast::StCacheOperator = {
".wb" => ast::StCacheOperator::Writeback,
".cg" => ast::StCacheOperator::L2Only,
".cs" => ast::StCacheOperator::Streaming,
".wt" => ast::StCacheOperator::Writethrough,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
InstRet: ast::Instruction<ast::ParsedArgParams<'input>> = {
"ret" <u:".uni"?> => ast::Instruction::Ret(ast::RetData { uniform: u.is_some() })
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta
InstCvta: ast::Instruction<ast::ParsedArgParams<'input>> = {
"cvta" <to:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
ast::Instruction::Cvta(ast::CvtaDetails {
to: to,
from: ast::CvtaStateSpace::Generic,
size: s
},
a)
},
"cvta" ".to" <from:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
ast::Instruction::Cvta(ast::CvtaDetails {
to: ast::CvtaStateSpace::Generic,
from: from,
size: s
},
a)
}
}
CvtaStateSpace: ast::CvtaStateSpace = {
".const" => ast::CvtaStateSpace::Const,
".global" => ast::CvtaStateSpace::Global,
".local" => ast::CvtaStateSpace::Local,
".shared" => ast::CvtaStateSpace::Shared,
}
CvtaSize: ast::CvtaSize = {
".u32" => ast::CvtaSize::U32,
".u64" => ast::CvtaSize::U64,
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call
InstCall: ast::Instruction<ast::ParsedArgParams<'input>> = {
"call" <u:".uni"?> <args:ArgCall> => {
let (ret_params, func, param_list) = args;
ast::Instruction::Call(ast::CallInst { uniform: u.is_some(), ret_params, func, param_list })
}
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs
InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = {
"abs" <t:SignedIntType> <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: t }, a)
},
"abs" <f:".ftz"?> ".f32" <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F32 }, a)
},
"abs" ".f64" <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: ast::ScalarType::F64 }, a)
},
"abs" <f:".ftz"?> ".f16" <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F16 }, a)
},
"abs" <f:".ftz"?> ".f16x2" <a:Arg2> => {
todo!()
},
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad
InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = {
"mad" <d:InstMulMode> <a:Arg4> => ast::Instruction::Mad(d, a),
"mad" ".hi" ".sat" ".s32" => todo!()
};
SignedIntType: ast::ScalarType = {
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
};
Operand: ast::Operand<&'input str> = {
<r:ExtendedID> => ast::Operand::Reg(r),
<r:ExtendedID> "+" <o:Num> => {
let offset = o.parse::<i32>();
let offset = offset.unwrap_with(errors);
ast::Operand::RegOffset(r, offset)
},
// TODO: start parsing whole constants sub-language:
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants
<o:Num> => {
let offset = o.parse::<u32>();
let offset = offset.unwrap_with(errors);
ast::Operand::Imm(offset)
}
};
CallOperand: ast::CallOperand<&'input str> = {
<r:ExtendedID> => ast::CallOperand::Reg(r),
<o:Num> => {
let offset = o.parse::<u32>();
let offset = offset.unwrap_with(errors);
ast::CallOperand::Imm(offset)
}
};
Arg1: ast::Arg1<ast::ParsedArgParams<'input>> = {
<src:ExtendedID> => ast::Arg1{<>}
};
Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
<dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
};
Arg2MovMember: ast::Arg2MovMember<ast::ParsedArgParams<'input>> = {
<dst:MemberOperand> "," <src:ExtendedID> => ast::Arg2MovMember::Dst(dst, dst.0, src),
<dst:ExtendedID> "," <src:MemberOperand> => ast::Arg2MovMember::Src(dst, src),
<dst:MemberOperand> "," <src:MemberOperand> => ast::Arg2MovMember::Both(dst, dst.0, src),
};
MemberOperand: (&'input str, u8) = {
<pref:ExtendedID> "." <suf:ExtendedID> =>? {
let suf_idx = vector_index(suf)?;
Ok((pref, suf_idx))
},
<pref:ExtendedID> <suf:DotID> =>? {
let suf_idx = vector_index(&suf[1..])?;
Ok((pref, suf_idx))
}
};
VectorExtract: Vec<&'input str> = {
"{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => {
vec![r1, r2]
},
"{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => {
vec![r1, r2, r3, r4]
},
};
Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
};
Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
};
Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
};
// TODO: pass src3 negation somewhere
Arg5: ast::Arg5<ast::ParsedArgParams<'input>> = {
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5{<>}
};
ArgCall: (Vec<&'input str>, &'input str, Vec<ast::CallOperand<&'input str>>) = {
"(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => {
(ret_params, func, param_list)
},
<func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => (Vec::new(), func, param_list),
<func:ExtendedID> => (Vec::new(), func, Vec::<ast::CallOperand<_>>::new()),
};
OptionalDst: &'input str = {
"|" <dst2:ExtendedID> => dst2
}
VectorPrefix: u8 = {
".v2" => 2,
".v4" => 4
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file
File = {
".file" Num String ("," Num "," Num)?
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section
Section = {
".section" DotID "{" SectionDwarfLines* "}"
};
SectionDwarfLines: () = {
BitType Comma<Num>,
".b32" SectionLabel,
".b64" SectionLabel,
".b32" SectionLabel "+" Num,
".b64" SectionLabel "+" Num,
};
SectionLabel = {
ID,
DotID
};
BitType = {
".b8", ".b16", ".b32", ".b64"
};
Comma<T>: Vec<T> = {
<v:(<T> ",")*> <e:T?> => match e {
None => v,
Some(e) => {
let mut v = v;
v.push(e);
v
}
}
};