mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-07-25 13:16:23 +03:00
1163 lines
35 KiB
Plaintext
1163 lines
35 KiB
Plaintext
use crate::ast;
|
|
use crate::ast::UnwrapWithVec;
|
|
use crate::{without_none, vector_index};
|
|
|
|
grammar<'a>(errors: &mut Vec<ast::PtxError>);
|
|
|
|
extern {
|
|
type Error = ast::PtxError;
|
|
}
|
|
|
|
match {
|
|
r"\s+" => { },
|
|
r"//[^\n\r]*[\n\r]*" => { },
|
|
r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { },
|
|
r"-?[?:0x]?[0-9]+" => Num,
|
|
r#""[^"]*""# => String,
|
|
r"[0-9]+\.[0-9]+" => VersionNumber,
|
|
"!",
|
|
"(", ")",
|
|
"+",
|
|
",",
|
|
".",
|
|
":",
|
|
";",
|
|
"@",
|
|
"[", "]",
|
|
"{", "}",
|
|
"<", ">",
|
|
"|",
|
|
".acquire",
|
|
".address_size",
|
|
".align",
|
|
".and",
|
|
".b16",
|
|
".b32",
|
|
".b64",
|
|
".b8",
|
|
".ca",
|
|
".cg",
|
|
".const",
|
|
".cs",
|
|
".cta",
|
|
".cv",
|
|
".entry",
|
|
".eq",
|
|
".equ",
|
|
".extern",
|
|
".f16",
|
|
".f16x2",
|
|
".f32",
|
|
".f64",
|
|
".file",
|
|
".ftz",
|
|
".func",
|
|
".ge",
|
|
".geu",
|
|
".global",
|
|
".gpu",
|
|
".gt",
|
|
".gtu",
|
|
".hi",
|
|
".hs",
|
|
".le",
|
|
".leu",
|
|
".lo",
|
|
".loc",
|
|
".local",
|
|
".ls",
|
|
".lt",
|
|
".ltu",
|
|
".lu",
|
|
".nan",
|
|
".ne",
|
|
".neu",
|
|
".num",
|
|
".or",
|
|
".param",
|
|
".pred",
|
|
".reg",
|
|
".relaxed",
|
|
".rm",
|
|
".rmi",
|
|
".rn",
|
|
".rni",
|
|
".rp",
|
|
".rpi",
|
|
".rz",
|
|
".rzi",
|
|
".s16",
|
|
".s32",
|
|
".s64",
|
|
".s8" ,
|
|
".sat",
|
|
".section",
|
|
".shared",
|
|
".sreg",
|
|
".sys",
|
|
".target",
|
|
".to",
|
|
".u16",
|
|
".u32",
|
|
".u64",
|
|
".u8" ,
|
|
".uni",
|
|
".v2",
|
|
".v4",
|
|
".version",
|
|
".visible",
|
|
".volatile",
|
|
".wb",
|
|
".weak",
|
|
".wide",
|
|
".wt",
|
|
".xor",
|
|
} else {
|
|
// IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID
|
|
"abs",
|
|
"add",
|
|
"bra",
|
|
"call",
|
|
"cvt",
|
|
"cvta",
|
|
"debug",
|
|
"ld",
|
|
"mad",
|
|
"map_f64_to_f32",
|
|
"mov",
|
|
"mul",
|
|
"not",
|
|
"ret",
|
|
"setp",
|
|
"shl",
|
|
"shr",
|
|
r"sm_[0-9]+" => ShaderModel,
|
|
"st",
|
|
"texmode_independent",
|
|
"texmode_unified",
|
|
} else {
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers
|
|
r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID,
|
|
r"\.[a-zA-Z][a-zA-Z0-9_$]*" => DotID,
|
|
}
|
|
|
|
ExtendedID : &'input str = {
|
|
"abs",
|
|
"add",
|
|
"bra",
|
|
"call",
|
|
"cvt",
|
|
"cvta",
|
|
"debug",
|
|
"ld",
|
|
"mad",
|
|
"map_f64_to_f32",
|
|
"mov",
|
|
"mul",
|
|
"not",
|
|
"ret",
|
|
"setp",
|
|
"shl",
|
|
"shr",
|
|
ShaderModel,
|
|
"st",
|
|
"texmode_independent",
|
|
"texmode_unified",
|
|
ID
|
|
}
|
|
|
|
pub Module: ast::Module<'input> = {
|
|
<v:Version> Target <f:Directive*> => {
|
|
ast::Module { version: v, functions: without_none(f) }
|
|
}
|
|
};
|
|
|
|
Version: (u8, u8) = {
|
|
".version" <v:VersionNumber> => {
|
|
let dot = v.find('.').unwrap();
|
|
let major = v[..dot].parse::<u8>();
|
|
let minor = v[dot+1..].parse::<u8>();
|
|
(major,minor).unwrap_with(errors)
|
|
}
|
|
}
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target
|
|
Target = {
|
|
".target" Comma<TargetSpecifier>
|
|
};
|
|
|
|
TargetSpecifier = {
|
|
ShaderModel,
|
|
"texmode_unified",
|
|
"texmode_independent",
|
|
"debug",
|
|
"map_f64_to_f32"
|
|
};
|
|
|
|
Directive: Option<ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>> = {
|
|
AddressSize => None,
|
|
<f:Function> => Some(f),
|
|
File => None,
|
|
Section => None
|
|
};
|
|
|
|
AddressSize = {
|
|
".address_size" Num
|
|
};
|
|
|
|
Function: ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>> = {
|
|
LinkingDirective*
|
|
<func_directive:MethodDecl>
|
|
<body:FunctionBody> => ast::Function{<>}
|
|
};
|
|
|
|
LinkingDirective = {
|
|
".extern",
|
|
".visible",
|
|
".weak"
|
|
};
|
|
|
|
MethodDecl: ast::MethodDecl<'input, &'input str> = {
|
|
".entry" <name:ExtendedID> <params:KernelArguments> => ast::MethodDecl::Kernel(name, params),
|
|
".func" <ret_vals:FnArguments?> <name:ExtendedID> <params:FnArguments> => {
|
|
ast::MethodDecl::Func(ret_vals.unwrap_or_else(|| Vec::new()), name, params)
|
|
}
|
|
};
|
|
|
|
KernelArguments: Vec<ast::KernelArgument<&'input str>> = {
|
|
"(" <args:Comma<KernelInput>> ")" => args
|
|
};
|
|
|
|
FnArguments: Vec<ast::FnArgument<&'input str>> = {
|
|
"(" <args:Comma<FnInput>> ")" => args
|
|
};
|
|
|
|
KernelInput: ast::Variable<ast::VariableParamType, &'input str> = {
|
|
<v:ParamVariable> => {
|
|
let (align, v_type, name) = v;
|
|
ast::Variable{ align, v_type, name }
|
|
}
|
|
}
|
|
|
|
FnInput: ast::Variable<ast::FnArgumentType, &'input str> = {
|
|
<v:RegVariable> => {
|
|
let (align, v_type, name) = v;
|
|
let v_type = ast::FnArgumentType::Reg(v_type);
|
|
ast::Variable{ align, v_type, name }
|
|
},
|
|
<v:ParamVariable> => {
|
|
let (align, v_type, name) = v;
|
|
let v_type = ast::FnArgumentType::Param(v_type);
|
|
ast::Variable{ align, v_type, name }
|
|
}
|
|
}
|
|
|
|
pub(crate) FunctionBody: Option<Vec<ast::Statement<ast::ParsedArgParams<'input>>>> = {
|
|
"{" <s:Statement*> "}" => { Some(without_none(s)) },
|
|
";" => { None }
|
|
};
|
|
|
|
StateSpaceSpecifier: ast::StateSpace = {
|
|
".reg" => ast::StateSpace::Reg,
|
|
".sreg" => ast::StateSpace::Sreg,
|
|
".const" => ast::StateSpace::Const,
|
|
".global" => ast::StateSpace::Global,
|
|
".local" => ast::StateSpace::Local,
|
|
".shared" => ast::StateSpace::Shared,
|
|
".param" => ast::StateSpace::Param, // used to prepare function call
|
|
};
|
|
|
|
ScalarType: ast::ScalarType = {
|
|
".f16" => ast::ScalarType::F16,
|
|
".f16x2" => ast::ScalarType::F16x2,
|
|
".pred" => ast::ScalarType::Pred,
|
|
LdStScalarType
|
|
};
|
|
|
|
LdStScalarType: ast::ScalarType = {
|
|
".b8" => ast::ScalarType::B8,
|
|
".b16" => ast::ScalarType::B16,
|
|
".b32" => ast::ScalarType::B32,
|
|
".b64" => ast::ScalarType::B64,
|
|
".u8" => ast::ScalarType::U8,
|
|
".u16" => ast::ScalarType::U16,
|
|
".u32" => ast::ScalarType::U32,
|
|
".u64" => ast::ScalarType::U64,
|
|
".s8" => ast::ScalarType::S8,
|
|
".s16" => ast::ScalarType::S16,
|
|
".s32" => ast::ScalarType::S32,
|
|
".s64" => ast::ScalarType::S64,
|
|
".f32" => ast::ScalarType::F32,
|
|
".f64" => ast::ScalarType::F64,
|
|
};
|
|
|
|
Statement: Option<ast::Statement<ast::ParsedArgParams<'input>>> = {
|
|
<l:Label> => Some(ast::Statement::Label(l)),
|
|
DebugDirective => None,
|
|
<v:MultiVariable> ";" => Some(ast::Statement::Variable(v)),
|
|
<p:PredAt?> <i:Instruction> ";" => Some(ast::Statement::Instruction(p, i)),
|
|
"{" <s:Statement*> "}" => Some(ast::Statement::Block(without_none(s)))
|
|
};
|
|
|
|
DebugDirective: () = {
|
|
DebugLocation
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc
|
|
DebugLocation = {
|
|
".loc" Num Num Num
|
|
};
|
|
|
|
Label: &'input str = {
|
|
<id:ExtendedID> ":" => id
|
|
};
|
|
|
|
Align: u32 = {
|
|
".align" <a:Num> => {
|
|
let align = a.parse::<u32>();
|
|
align.unwrap_with(errors)
|
|
}
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
|
|
MultiVariable: ast::MultiVariable<&'input str> = {
|
|
<var:Variable> <count:VariableParam?> => ast::MultiVariable{<>}
|
|
}
|
|
|
|
VariableParam: u32 = {
|
|
"<" <n:Num> ">" => {
|
|
let size = n.parse::<u32>();
|
|
size.unwrap_with(errors)
|
|
}
|
|
}
|
|
|
|
Variable: ast::Variable<ast::VariableType, &'input str> = {
|
|
<v:RegVariable> => {
|
|
let (align, v_type, name) = v;
|
|
let v_type = ast::VariableType::Reg(v_type);
|
|
ast::Variable {align, v_type, name}
|
|
},
|
|
LocalVariable,
|
|
<v:ParamVariable> => {
|
|
let (align, v_type, name) = v;
|
|
let v_type = ast::VariableType::Param(v_type);
|
|
ast::Variable {align, v_type, name}
|
|
},
|
|
};
|
|
|
|
RegVariable: (Option<u32>, ast::VariableRegType, &'input str) = {
|
|
".reg" <align:Align?> <t:ScalarType> <name:ExtendedID> => {
|
|
let v_type = ast::VariableRegType::Scalar(t);
|
|
(align, v_type, name)
|
|
},
|
|
".reg" <align:Align?> <v_len:VectorPrefix> <t:SizedScalarType> <name:ExtendedID> => {
|
|
let v_type = ast::VariableRegType::Vector(t, v_len);
|
|
(align, v_type, name)
|
|
}
|
|
}
|
|
|
|
LocalVariable: ast::Variable<ast::VariableType, &'input str> = {
|
|
".local" <align:Align?> <t:SizedScalarType> <name:ExtendedID> => {
|
|
let v_type = ast::VariableType::Local(ast::VariableLocalType::Scalar(t));
|
|
ast::Variable {align, v_type, name}
|
|
},
|
|
".local" <align:Align?> <v_len:VectorPrefix> <t:SizedScalarType> <name:ExtendedID> => {
|
|
let v_type = ast::VariableType::Local(ast::VariableLocalType::Vector(t, v_len));
|
|
ast::Variable {align, v_type, name}
|
|
},
|
|
".local" <align:Align?> <t:SizedScalarType> <name:ExtendedID> <arr:ArraySpecifier> => {
|
|
let v_type = ast::VariableType::Local(ast::VariableLocalType::Array(t, arr));
|
|
ast::Variable {align, v_type, name}
|
|
}
|
|
}
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
|
|
ParamVariable: (Option<u32>, ast::VariableParamType, &'input str) = {
|
|
".param" <align:Align?> <t:ParamScalarType> <name:ExtendedID> => {
|
|
let v_type = ast::VariableParamType::Scalar(t);
|
|
(align, v_type, name)
|
|
},
|
|
".param" <align:Align?> <t:SizedScalarType> <name:ExtendedID> <arr:ArraySpecifier> => {
|
|
let v_type = ast::VariableParamType::Array(t, arr);
|
|
(align, v_type, name)
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
SizedScalarType: ast::SizedScalarType = {
|
|
".b8" => ast::SizedScalarType::B8,
|
|
".b16" => ast::SizedScalarType::B16,
|
|
".b32" => ast::SizedScalarType::B32,
|
|
".b64" => ast::SizedScalarType::B64,
|
|
".u8" => ast::SizedScalarType::U8,
|
|
".u16" => ast::SizedScalarType::U16,
|
|
".u32" => ast::SizedScalarType::U32,
|
|
".u64" => ast::SizedScalarType::U64,
|
|
".s8" => ast::SizedScalarType::S8,
|
|
".s16" => ast::SizedScalarType::S16,
|
|
".s32" => ast::SizedScalarType::S32,
|
|
".s64" => ast::SizedScalarType::S64,
|
|
".f16" => ast::SizedScalarType::F16,
|
|
".f16x2" => ast::SizedScalarType::F16x2,
|
|
".f32" => ast::SizedScalarType::F32,
|
|
".f64" => ast::SizedScalarType::F64,
|
|
}
|
|
|
|
#[inline]
|
|
ParamScalarType: ast::ParamScalarType = {
|
|
".b8" => ast::ParamScalarType::B8,
|
|
".b16" => ast::ParamScalarType::B16,
|
|
".b32" => ast::ParamScalarType::B32,
|
|
".b64" => ast::ParamScalarType::B64,
|
|
".u8" => ast::ParamScalarType::U8,
|
|
".u16" => ast::ParamScalarType::U16,
|
|
".u32" => ast::ParamScalarType::U32,
|
|
".u64" => ast::ParamScalarType::U64,
|
|
".s8" => ast::ParamScalarType::S8,
|
|
".s16" => ast::ParamScalarType::S16,
|
|
".s32" => ast::ParamScalarType::S32,
|
|
".s64" => ast::ParamScalarType::S64,
|
|
".f16" => ast::ParamScalarType::F16,
|
|
".f32" => ast::ParamScalarType::F32,
|
|
".f64" => ast::ParamScalarType::F64,
|
|
}
|
|
|
|
ArraySpecifier: u32 = {
|
|
"[" <n:Num> "]" => {
|
|
let size = n.parse::<u32>();
|
|
size.unwrap_with(errors)
|
|
}
|
|
};
|
|
|
|
Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
InstLd,
|
|
InstMov,
|
|
InstMul,
|
|
InstAdd,
|
|
InstSetp,
|
|
InstNot,
|
|
InstBra,
|
|
InstCvt,
|
|
InstShl,
|
|
InstSt,
|
|
InstRet,
|
|
InstCvta,
|
|
InstCall,
|
|
InstAbs,
|
|
InstMad
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
|
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"ld" <q:LdStQualifier?> <ss:LdStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:IdOrVector> "," <src:MemoryOperand> => {
|
|
ast::Instruction::Ld(
|
|
ast::LdDetails {
|
|
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
|
state_space: ss.unwrap_or(ast::LdStateSpace::Generic),
|
|
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
|
|
typ: t
|
|
},
|
|
ast::Arg2Ld { dst:dst, src:src }
|
|
)
|
|
}
|
|
};
|
|
|
|
IdOrVector: ast::IdOrVector<&'input str> = {
|
|
<dst:ExtendedID> => ast::IdOrVector::Reg(dst),
|
|
<dst:VectorExtract> => ast::IdOrVector::Vec(dst)
|
|
}
|
|
|
|
OperandOrVector: ast::OperandOrVector<&'input str> = {
|
|
<op:Operand> => ast::OperandOrVector::from(op),
|
|
<dst:VectorExtract> => ast::OperandOrVector::Vec(dst)
|
|
}
|
|
|
|
LdStType: ast::Type = {
|
|
<v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
|
|
<t:LdStScalarType> => ast::Type::Scalar(t),
|
|
}
|
|
|
|
LdStQualifier: ast::LdStQualifier = {
|
|
".weak" => ast::LdStQualifier::Weak,
|
|
".volatile" => ast::LdStQualifier::Volatile,
|
|
".relaxed" <s:LdScope> => ast::LdStQualifier::Relaxed(s),
|
|
".acquire" <s:LdScope> => ast::LdStQualifier::Acquire(s),
|
|
};
|
|
|
|
LdScope: ast::LdScope = {
|
|
".cta" => ast::LdScope::Cta,
|
|
".gpu" => ast::LdScope::Gpu,
|
|
".sys" => ast::LdScope::Sys
|
|
};
|
|
|
|
LdStateSpace: ast::LdStateSpace = {
|
|
".const" => ast::LdStateSpace::Const,
|
|
".global" => ast::LdStateSpace::Global,
|
|
".local" => ast::LdStateSpace::Local,
|
|
".param" => ast::LdStateSpace::Param,
|
|
".shared" => ast::LdStateSpace::Shared,
|
|
};
|
|
|
|
LdCacheOperator: ast::LdCacheOperator = {
|
|
".ca" => ast::LdCacheOperator::Cached,
|
|
".cg" => ast::LdCacheOperator::L2Only,
|
|
".cs" => ast::LdCacheOperator::Streaming,
|
|
".lu" => ast::LdCacheOperator::LastUse,
|
|
".cv" => ast::LdCacheOperator::Uncached,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
|
|
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
<m:MovNormal> => ast::Instruction::Mov(m.0, m.1),
|
|
<m:MovVector> => ast::Instruction::Mov(m.0, m.1),
|
|
};
|
|
|
|
|
|
MovNormal: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
|
|
"mov" <t:MovScalarType> <dst:ExtendedID> "," <src:Operand> => {(
|
|
ast::MovDetails::new(ast::Type::Scalar(t)),
|
|
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: ast::IdOrVector::Reg(dst), src: src.into() })
|
|
)},
|
|
"mov" <pref:VectorPrefix> <t:MovVectorType> <dst:IdOrVector> "," <src:OperandOrVector> => {(
|
|
ast::MovDetails::new(ast::Type::Vector(t, pref)),
|
|
ast::Arg2Mov::Normal(ast::Arg2MovNormal{ dst: dst, src: src })
|
|
)}
|
|
}
|
|
|
|
MovVector: (ast::MovDetails, ast::Arg2Mov<ast::ParsedArgParams<'input>>) = {
|
|
"mov" <t:MovVectorType> <a:Arg2MovMember> => {(
|
|
ast::MovDetails::new(ast::Type::Scalar(t.into())),
|
|
ast::Arg2Mov::Member(a)
|
|
)},
|
|
}
|
|
|
|
#[inline]
|
|
MovScalarType: ast::ScalarType = {
|
|
".b16" => ast::ScalarType::B16,
|
|
".b32" => ast::ScalarType::B32,
|
|
".b64" => ast::ScalarType::B64,
|
|
".u16" => ast::ScalarType::U16,
|
|
".u32" => ast::ScalarType::U32,
|
|
".u64" => ast::ScalarType::U64,
|
|
".s16" => ast::ScalarType::S16,
|
|
".s32" => ast::ScalarType::S32,
|
|
".s64" => ast::ScalarType::S64,
|
|
".f32" => ast::ScalarType::F32,
|
|
".f64" => ast::ScalarType::F64,
|
|
".pred" => ast::ScalarType::Pred
|
|
};
|
|
|
|
#[inline]
|
|
MovVectorType: ast::ScalarType = {
|
|
".b16" => ast::ScalarType::B16,
|
|
".b32" => ast::ScalarType::B32,
|
|
".b64" => ast::ScalarType::B64,
|
|
".u16" => ast::ScalarType::U16,
|
|
".u32" => ast::ScalarType::U32,
|
|
".u64" => ast::ScalarType::U64,
|
|
".s16" => ast::ScalarType::S16,
|
|
".s32" => ast::ScalarType::S32,
|
|
".s64" => ast::ScalarType::S64,
|
|
".f32" => ast::ScalarType::F32,
|
|
".f64" => ast::ScalarType::F64,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
|
|
InstMul: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"mul" <d:InstMulMode> <a:Arg3> => ast::Instruction::Mul(d, a)
|
|
};
|
|
|
|
InstMulMode: ast::MulDetails = {
|
|
<ctr:MulIntControl> <t:IntType> => ast::MulDetails::Int(ast::MulIntDesc {
|
|
typ: t,
|
|
control: ctr
|
|
}),
|
|
<r:RoundingModeFloat?> <ftz:".ftz"?> <s:".sat"?> ".f32" => ast::MulDetails::Float(ast::MulFloatDesc {
|
|
typ: ast::FloatType::F32,
|
|
rounding: r,
|
|
flush_to_zero: ftz.is_some(),
|
|
saturate: s.is_some()
|
|
}),
|
|
<r:RoundingModeFloat?> ".f64" => ast::MulDetails::Float(ast::MulFloatDesc {
|
|
typ: ast::FloatType::F64,
|
|
rounding: r,
|
|
flush_to_zero: false,
|
|
saturate: false
|
|
}),
|
|
<r:".rn"?> <ftz:".ftz"?> <s:".sat"?> ".f16" => ast::MulDetails::Float(ast::MulFloatDesc {
|
|
typ: ast::FloatType::F16,
|
|
rounding: r.map(|_| ast::RoundingMode::NearestEven),
|
|
flush_to_zero: ftz.is_some(),
|
|
saturate: s.is_some()
|
|
}),
|
|
<r:".rn"?> <ftz:".ftz"?> <s:".sat"?> ".f16x2" => ast::MulDetails::Float(ast::MulFloatDesc {
|
|
typ: ast::FloatType::F16x2,
|
|
rounding: r.map(|_| ast::RoundingMode::NearestEven),
|
|
flush_to_zero: ftz.is_some(),
|
|
saturate: s.is_some()
|
|
})
|
|
};
|
|
|
|
MulIntControl: ast::MulIntControl = {
|
|
".hi" => ast::MulIntControl::High,
|
|
".lo" => ast::MulIntControl::Low,
|
|
".wide" => ast::MulIntControl::Wide
|
|
};
|
|
|
|
#[inline]
|
|
RoundingModeFloat : ast::RoundingMode = {
|
|
".rn" => ast::RoundingMode::NearestEven,
|
|
".rz" => ast::RoundingMode::Zero,
|
|
".rm" => ast::RoundingMode::NegativeInf,
|
|
".rp" => ast::RoundingMode::PositiveInf,
|
|
};
|
|
|
|
RoundingModeInt : ast::RoundingMode = {
|
|
".rni" => ast::RoundingMode::NearestEven,
|
|
".rzi" => ast::RoundingMode::Zero,
|
|
".rmi" => ast::RoundingMode::NegativeInf,
|
|
".rpi" => ast::RoundingMode::PositiveInf,
|
|
};
|
|
|
|
IntType : ast::IntType = {
|
|
".u16" => ast::IntType::U16,
|
|
".u32" => ast::IntType::U32,
|
|
".u64" => ast::IntType::U64,
|
|
".s16" => ast::IntType::S16,
|
|
".s32" => ast::IntType::S32,
|
|
".s64" => ast::IntType::S64,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add
|
|
InstAdd: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"add" <d:InstAddMode> <a:Arg3> => ast::Instruction::Add(d, a)
|
|
};
|
|
|
|
InstAddMode: ast::AddDetails = {
|
|
<t:IntType> => ast::AddDetails::Int(ast::AddIntDesc {
|
|
typ: t,
|
|
saturate: false,
|
|
}),
|
|
".sat" ".s32" => ast::AddDetails::Int(ast::AddIntDesc {
|
|
typ: ast::IntType::S32,
|
|
saturate: true,
|
|
}),
|
|
<rn:RoundingModeFloat?> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::AddDetails::Float(ast::AddFloatDesc {
|
|
typ: ast::FloatType::F32,
|
|
rounding: rn,
|
|
flush_to_zero: ftz.is_some(),
|
|
saturate: sat.is_some(),
|
|
}),
|
|
<rn:RoundingModeFloat?> ".f64" => ast::AddDetails::Float(ast::AddFloatDesc {
|
|
typ: ast::FloatType::F64,
|
|
rounding: rn,
|
|
flush_to_zero: false,
|
|
saturate: false,
|
|
}),
|
|
<rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?>".f16" => ast::AddDetails::Float(ast::AddFloatDesc {
|
|
typ: ast::FloatType::F16,
|
|
rounding: rn.map(|_| ast::RoundingMode::NearestEven),
|
|
flush_to_zero: ftz.is_some(),
|
|
saturate: sat.is_some(),
|
|
}),
|
|
".rn"? ".ftz"? ".sat"? ".f16x2" => todo!()
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp
|
|
// TODO: support f16 setp
|
|
InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a),
|
|
"setp" <d:SetpBoolMode> <a:Arg5> => ast::Instruction::SetpBool(d, a),
|
|
};
|
|
|
|
SetpMode: ast::SetpData = {
|
|
<cmp_op:SetpCompareOp> <ftz:".ftz"?> <t:SetpType> => ast::SetpData{
|
|
typ: t,
|
|
flush_to_zero: ftz.is_some(),
|
|
cmp_op: cmp_op,
|
|
}
|
|
};
|
|
|
|
SetpBoolMode: ast::SetpBoolData = {
|
|
<cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <ftz:".ftz"?> <t:SetpType> => ast::SetpBoolData{
|
|
typ: t,
|
|
flush_to_zero: ftz.is_some(),
|
|
cmp_op: cmp_op,
|
|
bool_op: bool_op,
|
|
}
|
|
};
|
|
|
|
SetpCompareOp: ast::SetpCompareOp = {
|
|
".eq" => ast::SetpCompareOp::Eq,
|
|
".ne" => ast::SetpCompareOp::NotEq,
|
|
".lt" => ast::SetpCompareOp::Less,
|
|
".le" => ast::SetpCompareOp::LessOrEq,
|
|
".gt" => ast::SetpCompareOp::Greater,
|
|
".ge" => ast::SetpCompareOp::GreaterOrEq,
|
|
".lo" => ast::SetpCompareOp::Less,
|
|
".ls" => ast::SetpCompareOp::LessOrEq,
|
|
".hi" => ast::SetpCompareOp::Greater,
|
|
".hs" => ast::SetpCompareOp::GreaterOrEq,
|
|
".equ" => ast::SetpCompareOp::NanEq,
|
|
".neu" => ast::SetpCompareOp::NanNotEq,
|
|
".ltu" => ast::SetpCompareOp::NanLess,
|
|
".leu" => ast::SetpCompareOp::NanLessOrEq,
|
|
".gtu" => ast::SetpCompareOp::NanGreater,
|
|
".geu" => ast::SetpCompareOp::NanGreaterOrEq,
|
|
".num" => ast::SetpCompareOp::IsNotNan,
|
|
".nan" => ast::SetpCompareOp::IsNan,
|
|
};
|
|
|
|
SetpBoolPostOp: ast::SetpBoolPostOp = {
|
|
".and" => ast::SetpBoolPostOp::And,
|
|
".or" => ast::SetpBoolPostOp::Or,
|
|
".xor" => ast::SetpBoolPostOp::Xor,
|
|
};
|
|
|
|
SetpType: ast::ScalarType = {
|
|
".b16" => ast::ScalarType::B16,
|
|
".b32" => ast::ScalarType::B32,
|
|
".b64" => ast::ScalarType::B64,
|
|
".u16" => ast::ScalarType::U16,
|
|
".u32" => ast::ScalarType::U32,
|
|
".u64" => ast::ScalarType::U64,
|
|
".s16" => ast::ScalarType::S16,
|
|
".s32" => ast::ScalarType::S32,
|
|
".s64" => ast::ScalarType::S64,
|
|
".f32" => ast::ScalarType::F32,
|
|
".f64" => ast::ScalarType::F64,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not
|
|
InstNot: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"not" <t:NotType> <a:Arg2> => ast::Instruction::Not(t, a)
|
|
};
|
|
|
|
NotType: ast::NotType = {
|
|
".pred" => ast::NotType::Pred,
|
|
".b16" => ast::NotType::B16,
|
|
".b32" => ast::NotType::B32,
|
|
".b64" => ast::NotType::B64,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at
|
|
PredAt: ast::PredAt<&'input str> = {
|
|
"@" <label:ExtendedID> => ast::PredAt { not: false, label:label },
|
|
"@" "!" <label:ExtendedID> => ast::PredAt { not: true, label:label }
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra
|
|
InstBra: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"bra" <u:".uni"?> <a:Arg1> => ast::Instruction::Bra(ast::BraData{ uniform: u.is_some() }, a)
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt
|
|
InstCvt: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"cvt" <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeInt> <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::new_int_from_int_checked(
|
|
s.is_some(),
|
|
dst_t,
|
|
src_t,
|
|
errors
|
|
),
|
|
a)
|
|
},
|
|
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeFloat> <src_t:CvtTypeInt> <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::new_float_from_int_checked(
|
|
r,
|
|
f.is_some(),
|
|
s.is_some(),
|
|
dst_t,
|
|
src_t,
|
|
errors
|
|
),
|
|
a)
|
|
},
|
|
"cvt" <r:RoundingModeInt> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeFloat> <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::new_int_from_float_checked(
|
|
r,
|
|
f.is_some(),
|
|
s.is_some(),
|
|
dst_t,
|
|
src_t,
|
|
errors
|
|
),
|
|
a)
|
|
},
|
|
"cvt" <r:RoundingModeInt?> <s:".sat"?> ".f16" ".f16" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: r,
|
|
flush_to_zero: false,
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F16,
|
|
src: ast::FloatType::F16
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <f:".ftz"?> <s:".sat"?> ".f32" ".f16" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: None,
|
|
flush_to_zero: f.is_some(),
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F32,
|
|
src: ast::FloatType::F16
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <s:".sat"?> ".f64" ".f16" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: None,
|
|
flush_to_zero: false,
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F64,
|
|
src: ast::FloatType::F16
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f16" ".f32" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: Some(r),
|
|
flush_to_zero: f.is_some(),
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F16,
|
|
src: ast::FloatType::F32
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <r:RoundingModeFloat?> <f:".ftz"?> <s:".sat"?> ".f32" ".f32" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: r,
|
|
flush_to_zero: f.is_some(),
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F32,
|
|
src: ast::FloatType::F32
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <s:".sat"?> ".f64" ".f32" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: None,
|
|
flush_to_zero: false,
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F64,
|
|
src: ast::FloatType::F32
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <r:RoundingModeFloat> <s:".sat"?> ".f16" ".f64" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: Some(r),
|
|
flush_to_zero: false,
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F16,
|
|
src: ast::FloatType::F64
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f32" ".f64" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: Some(r),
|
|
flush_to_zero: s.is_some(),
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F32,
|
|
src: ast::FloatType::F64
|
|
}
|
|
), a)
|
|
},
|
|
"cvt" <r:RoundingModeFloat?> <s:".sat"?> ".f64" ".f64" <a:Arg2> => {
|
|
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
|
|
ast::CvtDesc {
|
|
rounding: r,
|
|
flush_to_zero: false,
|
|
saturate: s.is_some(),
|
|
dst: ast::FloatType::F64,
|
|
src: ast::FloatType::F64
|
|
}
|
|
), a)
|
|
},
|
|
};
|
|
|
|
CvtTypeInt: ast::IntType = {
|
|
".u8" => ast::IntType::U8,
|
|
".u16" => ast::IntType::U16,
|
|
".u32" => ast::IntType::U32,
|
|
".u64" => ast::IntType::U64,
|
|
".s8" => ast::IntType::S8,
|
|
".s16" => ast::IntType::S16,
|
|
".s32" => ast::IntType::S32,
|
|
".s64" => ast::IntType::S64,
|
|
};
|
|
|
|
CvtTypeFloat: ast::FloatType = {
|
|
".f16" => ast::FloatType::F16,
|
|
".f32" => ast::FloatType::F32,
|
|
".f64" => ast::FloatType::F64,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl
|
|
InstShl: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"shl" <t:ShlType> <a:Arg3> => ast::Instruction::Shl(t, a)
|
|
};
|
|
|
|
ShlType: ast::ShlType = {
|
|
".b16" => ast::ShlType::B16,
|
|
".b32" => ast::ShlType::B32,
|
|
".b64" => ast::ShlType::B64,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
|
|
// Warning: NVIDIA documentation is incorrect, you can specify scope only once
|
|
InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:OperandOrVector> => {
|
|
ast::Instruction::St(
|
|
ast::StData {
|
|
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
|
|
state_space: ss.unwrap_or(ast::StStateSpace::Generic),
|
|
caching: cop.unwrap_or(ast::StCacheOperator::Writeback),
|
|
typ: t
|
|
},
|
|
ast::Arg2St { src1:src1, src2:src2 }
|
|
)
|
|
}
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#using-addresses-arrays-and-vectors
|
|
MemoryOperand: ast::Operand<&'input str> = {
|
|
"[" <o:Operand> "]" => o
|
|
}
|
|
|
|
StStateSpace: ast::StStateSpace = {
|
|
".global" => ast::StStateSpace::Global,
|
|
".local" => ast::StStateSpace::Local,
|
|
".param" => ast::StStateSpace::Param,
|
|
".shared" => ast::StStateSpace::Shared,
|
|
};
|
|
|
|
StCacheOperator: ast::StCacheOperator = {
|
|
".wb" => ast::StCacheOperator::Writeback,
|
|
".cg" => ast::StCacheOperator::L2Only,
|
|
".cs" => ast::StCacheOperator::Streaming,
|
|
".wt" => ast::StCacheOperator::Writethrough,
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
|
|
InstRet: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"ret" <u:".uni"?> => ast::Instruction::Ret(ast::RetData { uniform: u.is_some() })
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta
|
|
InstCvta: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"cvta" <to:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
|
|
ast::Instruction::Cvta(ast::CvtaDetails {
|
|
to: to,
|
|
from: ast::CvtaStateSpace::Generic,
|
|
size: s
|
|
},
|
|
a)
|
|
},
|
|
"cvta" ".to" <from:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
|
|
ast::Instruction::Cvta(ast::CvtaDetails {
|
|
to: ast::CvtaStateSpace::Generic,
|
|
from: from,
|
|
size: s
|
|
},
|
|
a)
|
|
}
|
|
}
|
|
|
|
CvtaStateSpace: ast::CvtaStateSpace = {
|
|
".const" => ast::CvtaStateSpace::Const,
|
|
".global" => ast::CvtaStateSpace::Global,
|
|
".local" => ast::CvtaStateSpace::Local,
|
|
".shared" => ast::CvtaStateSpace::Shared,
|
|
}
|
|
|
|
CvtaSize: ast::CvtaSize = {
|
|
".u32" => ast::CvtaSize::U32,
|
|
".u64" => ast::CvtaSize::U64,
|
|
}
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call
|
|
InstCall: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"call" <u:".uni"?> <args:ArgCall> => {
|
|
let (ret_params, func, param_list) = args;
|
|
ast::Instruction::Call(ast::CallInst { uniform: u.is_some(), ret_params, func, param_list })
|
|
}
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs
|
|
InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"abs" <t:SignedIntType> <a:Arg2> => {
|
|
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: t }, a)
|
|
},
|
|
"abs" <f:".ftz"?> ".f32" <a:Arg2> => {
|
|
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F32 }, a)
|
|
},
|
|
"abs" ".f64" <a:Arg2> => {
|
|
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: false, typ: ast::ScalarType::F64 }, a)
|
|
},
|
|
"abs" <f:".ftz"?> ".f16" <a:Arg2> => {
|
|
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: f.is_some(), typ: ast::ScalarType::F16 }, a)
|
|
},
|
|
"abs" <f:".ftz"?> ".f16x2" <a:Arg2> => {
|
|
todo!()
|
|
},
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad
|
|
InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
|
"mad" <d:InstMulMode> <a:Arg4> => ast::Instruction::Mad(d, a),
|
|
"mad" ".hi" ".sat" ".s32" => todo!()
|
|
};
|
|
|
|
SignedIntType: ast::ScalarType = {
|
|
".s16" => ast::ScalarType::S16,
|
|
".s32" => ast::ScalarType::S32,
|
|
".s64" => ast::ScalarType::S64,
|
|
};
|
|
|
|
Operand: ast::Operand<&'input str> = {
|
|
<r:ExtendedID> => ast::Operand::Reg(r),
|
|
<r:ExtendedID> "+" <o:Num> => {
|
|
let offset = o.parse::<i32>();
|
|
let offset = offset.unwrap_with(errors);
|
|
ast::Operand::RegOffset(r, offset)
|
|
},
|
|
// TODO: start parsing whole constants sub-language:
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants
|
|
<o:Num> => {
|
|
let offset = o.parse::<u32>();
|
|
let offset = offset.unwrap_with(errors);
|
|
ast::Operand::Imm(offset)
|
|
}
|
|
};
|
|
|
|
CallOperand: ast::CallOperand<&'input str> = {
|
|
<r:ExtendedID> => ast::CallOperand::Reg(r),
|
|
<o:Num> => {
|
|
let offset = o.parse::<u32>();
|
|
let offset = offset.unwrap_with(errors);
|
|
ast::CallOperand::Imm(offset)
|
|
}
|
|
};
|
|
|
|
Arg1: ast::Arg1<ast::ParsedArgParams<'input>> = {
|
|
<src:ExtendedID> => ast::Arg1{<>}
|
|
};
|
|
|
|
Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
|
|
<dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
|
|
};
|
|
|
|
Arg2MovMember: ast::Arg2MovMember<ast::ParsedArgParams<'input>> = {
|
|
<dst:MemberOperand> "," <src:ExtendedID> => ast::Arg2MovMember::Dst(dst, dst.0, src),
|
|
<dst:ExtendedID> "," <src:MemberOperand> => ast::Arg2MovMember::Src(dst, src),
|
|
<dst:MemberOperand> "," <src:MemberOperand> => ast::Arg2MovMember::Both(dst, dst.0, src),
|
|
};
|
|
|
|
MemberOperand: (&'input str, u8) = {
|
|
<pref:ExtendedID> "." <suf:ExtendedID> =>? {
|
|
let suf_idx = vector_index(suf)?;
|
|
Ok((pref, suf_idx))
|
|
},
|
|
<pref:ExtendedID> <suf:DotID> =>? {
|
|
let suf_idx = vector_index(&suf[1..])?;
|
|
Ok((pref, suf_idx))
|
|
}
|
|
};
|
|
|
|
VectorExtract: Vec<&'input str> = {
|
|
"{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => {
|
|
vec![r1, r2]
|
|
},
|
|
"{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => {
|
|
vec![r1, r2, r3, r4]
|
|
},
|
|
};
|
|
|
|
Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
|
|
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
|
|
};
|
|
|
|
Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
|
|
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
|
|
};
|
|
|
|
Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
|
|
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
|
|
};
|
|
|
|
// TODO: pass src3 negation somewhere
|
|
Arg5: ast::Arg5<ast::ParsedArgParams<'input>> = {
|
|
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5{<>}
|
|
};
|
|
|
|
ArgCall: (Vec<&'input str>, &'input str, Vec<ast::CallOperand<&'input str>>) = {
|
|
"(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => {
|
|
(ret_params, func, param_list)
|
|
},
|
|
<func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => (Vec::new(), func, param_list),
|
|
<func:ExtendedID> => (Vec::new(), func, Vec::<ast::CallOperand<_>>::new()),
|
|
};
|
|
|
|
OptionalDst: &'input str = {
|
|
"|" <dst2:ExtendedID> => dst2
|
|
}
|
|
|
|
VectorPrefix: u8 = {
|
|
".v2" => 2,
|
|
".v4" => 4
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file
|
|
File = {
|
|
".file" Num String ("," Num "," Num)?
|
|
};
|
|
|
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section
|
|
Section = {
|
|
".section" DotID "{" SectionDwarfLines* "}"
|
|
};
|
|
|
|
SectionDwarfLines: () = {
|
|
BitType Comma<Num>,
|
|
".b32" SectionLabel,
|
|
".b64" SectionLabel,
|
|
".b32" SectionLabel "+" Num,
|
|
".b64" SectionLabel "+" Num,
|
|
};
|
|
|
|
SectionLabel = {
|
|
ID,
|
|
DotID
|
|
};
|
|
|
|
BitType = {
|
|
".b8", ".b16", ".b32", ".b64"
|
|
};
|
|
|
|
Comma<T>: Vec<T> = {
|
|
<v:(<T> ",")*> <e:T?> => match e {
|
|
None => v,
|
|
Some(e) => {
|
|
let mut v = v;
|
|
v.push(e);
|
|
v
|
|
}
|
|
}
|
|
};
|