ZLUDA/ptx/src/ptx.lalrpop
2021-09-24 01:31:50 +02:00

2142 lines
68 KiB
Plaintext

use crate::ast;
use crate::ast::UnwrapWithVec;
use crate::{without_none, vector_index};
use lalrpop_util::ParseError;
use std::convert::TryInto;
grammar<'err>(errors: &'err mut Vec<ParseError<usize, Token<'input>, ast::PtxError>>);
extern {
type Error = ast::PtxError;
}
match {
r"\s+" => { },
r"//[^\n\r]*[\n\r]*" => { },
r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/" => { },
r"0[fF][0-9a-zA-Z]{8}" => F32NumToken,
r"0[dD][0-9a-zA-Z]{16}" => F64NumToken,
r"0[xX][0-9a-zA-Z]+U?" => HexNumToken,
r"[0-9]+U?" => DecimalNumToken,
r#""[^"]*""# => String,
r"[0-9]+\.[0-9]+" => VersionNumber,
"!",
"(", ")",
"+",
"-",
",",
".",
":",
";",
"@",
"[", "]",
"{", "}",
"<", ">",
"|",
"=",
".acq_rel",
".acquire",
".add",
".address_size",
".align",
".aligned",
".and",
".approx",
".b16",
".b32",
".b64",
".b8",
".ca",
".cas",
".cg",
".const",
".cs",
".cta",
".cv",
".dec",
".entry",
".eq",
".equ",
".exch",
".extern",
".f16",
".f16x2",
".f32",
".f64",
".file",
".ftz",
".full",
".func",
".ge",
".geu",
".gl",
".global",
".gpu",
".gt",
".gtu",
".hi",
".hs",
".inc",
".le",
".leu",
".lo",
".loc",
".local",
".ls",
".lt",
".ltu",
".lu",
".max",
".maxnreg",
".maxntid",
".minnctapersm",
".min",
".nan",
".NaN",
".nc",
".ne",
".neu",
".num",
".or",
".param",
".pragma",
".pred",
".reg",
".relaxed",
".release",
".reqntid",
".rm",
".rmi",
".rn",
".rni",
".rp",
".rpi",
".rz",
".rzi",
".s16",
".s32",
".s64",
".s8" ,
".sat",
".section",
".shared",
".sync",
".sys",
".target",
".to",
".u16",
".u32",
".u64",
".u8" ,
".uni",
".v2",
".v4",
".version",
".visible",
".volatile",
".wb",
".weak",
".wide",
".wt",
".xor",
} else {
// IF YOU ARE ADDING A NEW TOKEN HERE ALSO ADD IT BELOW TO ExtendedID
"abs",
"activemask",
"add",
"and",
"atom",
"bar",
"barrier",
"bfe",
"bfi",
"bra",
"brev",
"call",
"clz",
"cos",
"cvt",
"cvta",
"debug",
"div",
"ex2",
"fma",
"ld",
"lg2",
"mad",
"map_f64_to_f32",
"max",
"membar",
"min",
"mov",
"mul",
"neg",
"not",
"or",
"popc",
"prmt",
"rcp",
"rem",
"ret",
"rsqrt",
"selp",
"setp",
"shl",
"shr",
"sin",
r"sm_[0-9]+" => ShaderModel,
"sqrt",
"st",
"sub",
"texmode_independent",
"texmode_unified",
"xor",
} else {
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers
r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+" => ID,
r"\.[a-zA-Z][a-zA-Z0-9_$]*" => DotID,
}
ExtendedID : &'input str = {
"abs",
"activemask",
"add",
"and",
"atom",
"bar",
"barrier",
"bfe",
"bfi",
"bra",
"brev",
"call",
"clz",
"cos",
"cvt",
"cvta",
"debug",
"div",
"ex2",
"fma",
"ld",
"lg2",
"mad",
"map_f64_to_f32",
"max",
"membar",
"min",
"mov",
"mul",
"neg",
"not",
"or",
"popc",
"prmt",
"rcp",
"rem",
"ret",
"rsqrt",
"selp",
"setp",
"shl",
"shr",
"sin",
ShaderModel,
"sqrt",
"st",
"sub",
"texmode_independent",
"texmode_unified",
"xor",
ID
}
NumToken: (&'input str, u32, bool) = {
<s:HexNumToken> => {
if s.ends_with('U') {
(&s[2..s.len() - 1], 16, true)
} else {
(&s[2..], 16, false)
}
},
<s:DecimalNumToken> => {
let radix = if s.starts_with('0') { 8 } else { 10 };
if s.ends_with('U') {
(&s[..s.len() - 1], radix, true)
} else {
(s, radix, false)
}
}
}
F32Num: f32 = {
<s:F32NumToken> =>? {
match u32::from_str_radix(&s[2..], 16) {
Ok(x) => Ok(unsafe { std::mem::transmute::<_, f32>(x) }),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
}
}
F64Num: f64 = {
<s:F64NumToken> =>? {
match u64::from_str_radix(&s[2..], 16) {
Ok(x) => Ok(unsafe { std::mem::transmute::<_, f64>(x) }),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
}
}
U8Num: u8 = {
<x:NumToken> =>? {
let (text, radix, _) = x;
match u8::from_str_radix(text, radix) {
Ok(x) => Ok(x),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
}
}
U16Num: u16 = {
<x:NumToken> =>? {
let (text, radix, _) = x;
match u16::from_str_radix(text, radix) {
Ok(x) => Ok(x),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
}
}
U32Num: u32 = {
<x:NumToken> =>? {
let (text, radix, _) = x;
match u32::from_str_radix(text, radix) {
Ok(x) => Ok(x),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
}
}
// TODO: handle negative number properly
S32Num: i32 = {
<sign:"-"?> <x:NumToken> =>? {
let (text, radix, _) = x;
match i32::from_str_radix(text, radix) {
Ok(x) => Ok(if sign.is_some() { -x } else { x }),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
}
}
pub Module: ast::Module<'input> = {
<v:Version> Target <d:Directive*> => {
ast::Module { version: v, directives: without_none(d) }
}
};
Version: (u8, u8) = {
".version" <v:VersionNumber> =>? {
let dot = v.find('.').unwrap();
let major = v[..dot].parse::<u8>().map_err(|e| ParseError::from(ast::PtxError::from(e)))?;
let minor = v[dot+1..].parse::<u8>().map_err(|e| ParseError::from(ast::PtxError::from(e)))?;
Ok((major,minor))
}
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target
Target = {
".target" Comma<TargetSpecifier>
};
TargetSpecifier = {
ShaderModel,
"texmode_unified",
"texmode_independent",
"debug",
"map_f64_to_f32"
};
Directive: Option<ast::Directive<'input, ast::ParsedArgParams<'input>>> = {
AddressSize => None,
<f:Function> => {
let (linking, func) = f;
Some(ast::Directive::Method(linking, func))
},
File => None,
Section => None,
<v:ModuleVariable> ";" => {
let (linking, var) = v;
Some(ast::Directive::Variable(linking, var))
},
! => {
let err = <>;
errors.push(err.error);
None
}
};
AddressSize = {
".address_size" U8Num
};
Function: (ast::LinkingDirective, ast::Function<'input, &'input str, ast::Statement<ast::ParsedArgParams<'input>>>) = {
<linking:LinkingDirectives>
<func_directive:MethodDeclaration>
<tuning:TuningDirective*>
<body:FunctionBody> => {
(linking, ast::Function{func_directive, tuning, body})
}
};
LinkingDirective: ast::LinkingDirective = {
".extern" => ast::LinkingDirective::EXTERN,
".visible" => ast::LinkingDirective::VISIBLE,
".weak" => ast::LinkingDirective::WEAK,
};
TuningDirective: ast::TuningDirective = {
".maxnreg" <ncta:U32Num> => ast::TuningDirective::MaxNReg(ncta),
".maxntid" <nx:U32Num> => ast::TuningDirective::MaxNtid(nx, 1, 1),
".maxntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, 1),
".maxntid" <nx:U32Num> "," <ny:U32Num> "," <nz:U32Num> => ast::TuningDirective::MaxNtid(nx, ny, nz),
".reqntid" <nx:U32Num> => ast::TuningDirective::ReqNtid(nx, 1, 1),
".reqntid" <nx:U32Num> "," <ny:U32Num> => ast::TuningDirective::ReqNtid(nx, ny, 1),
".reqntid" <nx:U32Num> "," <ny:U32Num> "," <nz:U32Num> => ast::TuningDirective::ReqNtid(nx, ny, nz),
".minnctapersm" <ncta:U32Num> => ast::TuningDirective::MinNCtaPerSm(ncta),
};
LinkingDirectives: ast::LinkingDirective = {
<ldirs:LinkingDirective*> => {
ldirs.into_iter().fold(ast::LinkingDirective::NONE, |x, y| x | y)
}
}
MethodDeclaration: ast::MethodDeclaration<'input, &'input str> = {
".entry" <name:ExtendedID> <input_arguments:KernelArguments> => {
let return_arguments = Vec::new();
let name = ast::MethodName::Kernel(name);
ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None }
},
".func" <return_arguments:FnArguments?> <name:ExtendedID> <input_arguments:FnArguments> => {
let return_arguments = return_arguments.unwrap_or_else(|| Vec::new());
let name = ast::MethodName::Func(name);
ast::MethodDeclaration{ return_arguments, name, input_arguments, shared_mem: None }
}
};
KernelArguments: Vec<ast::Variable<&'input str>> = {
"(" <args:Comma<KernelInput>> ")" => args
};
FnArguments: Vec<ast::Variable<&'input str>> = {
"(" <args:Comma<FnInput>> ")" => args
};
KernelInput: ast::Variable<&'input str> = {
<v:ParamDeclaration> => {
let (align, v_type, name) = v;
ast::Variable {
align,
v_type,
state_space: ast::StateSpace::Param,
name,
array_init: Vec::new()
}
}
}
FnInput: ast::Variable<&'input str> = {
<v:RegVariable> => {
let (align, v_type, name) = v;
let state_space = ast::StateSpace::Reg;
ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() }
},
<v:ParamDeclaration> => {
let (align, v_type, name) = v;
let state_space = ast::StateSpace::Param;
ast::Variable{ align, v_type, state_space, name, array_init: Vec::new() }
}
}
FunctionBody: Option<Vec<ast::Statement<ast::ParsedArgParams<'input>>>> = {
"{" <s:Statement*> "}" => { Some(without_none(s)) },
";" => { None }
};
StateSpaceSpecifier: ast::StateSpace = {
".reg" => ast::StateSpace::Reg,
".const" => ast::StateSpace::Const,
".global" => ast::StateSpace::Global,
".local" => ast::StateSpace::Local,
".shared" => ast::StateSpace::Shared,
".param" => ast::StateSpace::Param, // used to prepare function call
};
#[inline]
ScalarType: ast::ScalarType = {
".f16" => ast::ScalarType::F16,
".f16x2" => ast::ScalarType::F16x2,
".pred" => ast::ScalarType::Pred,
".b8" => ast::ScalarType::B8,
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u8" => ast::ScalarType::U8,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s8" => ast::ScalarType::S8,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
Statement: Option<ast::Statement<ast::ParsedArgParams<'input>>> = {
<l:Label> => Some(ast::Statement::Label(l)),
DebugDirective => None,
<v:MultiVariable> ";" => Some(ast::Statement::Variable(v)),
<p:PredAt?> <i:Instruction> ";" => Some(ast::Statement::Instruction(p, i)),
PragmaStatement => None,
"{" <s:Statement*> "}" => Some(ast::Statement::Block(without_none(s))),
! ";" => {
let (err, _) = (<>);
errors.push(err.error);
None
}
};
PragmaStatement: () = {
".pragma" String ";"
}
DebugDirective: () = {
DebugLocation
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc
DebugLocation = {
".loc" U32Num U32Num U32Num
};
Label: &'input str = {
<id:ExtendedID> ":" => id
};
Align: u32 = {
".align" <x:U32Num> => x
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameterized-variable-names
MultiVariable: ast::MultiVariable<&'input str> = {
<var:Variable> <count:VariableParam?> => ast::MultiVariable{<>}
}
VariableParam: u32 = {
"<" <n:U32Num> ">" => n
}
Variable: ast::Variable<&'input str> = {
<v:RegVariable> => {
let (align, v_type, name) = v;
let state_space = ast::StateSpace::Reg;
ast::Variable {align, v_type, state_space, name, array_init: Vec::new()}
},
LocalVariable,
<v:ParamVariable> => {
let (align, array_init, v_type, name) = v;
let state_space = ast::StateSpace::Param;
ast::Variable {align, v_type, state_space, name, array_init}
},
SharedVariable,
};
RegVariable: (Option<u32>, ast::Type, &'input str) = {
".reg" <var:VariableScalar<ScalarType>> => {
let (align, t, name) = var;
let v_type = ast::Type::Scalar(t);
(align, v_type, name)
},
".reg" <var:VariableVector<SizedScalarType>> => {
let (align, v_len, t, name) = var;
let v_type = ast::Type::Vector(t, v_len);
(align, v_type, name)
}
}
LocalVariable: ast::Variable<&'input str> = {
".local" <var:VariableScalar<SizedScalarType>> => {
let (align, t, name) = var;
let v_type = ast::Type::Scalar(t);
let state_space = ast::StateSpace::Local;
ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
},
".local" <var:VariableVector<SizedScalarType>> => {
let (align, v_len, t, name) = var;
let v_type = ast::Type::Vector(t, v_len);
let state_space = ast::StateSpace::Local;
ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
},
".local" <var:VariableArrayOrPointer<SizedScalarType>> =>? {
let (align, t, name, arr_or_ptr) = var;
let state_space = ast::StateSpace::Local;
let (v_type, array_init) = match arr_or_ptr {
ast::ArrayOrPointer::Array { dimensions, init } => {
(ast::Type::Array(t, dimensions), init)
}
ast::ArrayOrPointer::Pointer => {
return Err(ParseError::User { error: ast::PtxError::ZeroDimensionArray });
}
};
Ok(ast::Variable { align, v_type, state_space, name, array_init })
}
}
SharedVariable: ast::Variable<&'input str> = {
".shared" <var:VariableScalar<SizedScalarType>> => {
let (align, t, name) = var;
let state_space = ast::StateSpace::Shared;
let v_type = ast::Type::Scalar(t);
ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
},
".shared" <var:VariableVector<SizedScalarType>> => {
let (align, v_len, t, name) = var;
let state_space = ast::StateSpace::Shared;
let v_type = ast::Type::Vector(t, v_len);
ast::Variable { align, v_type, state_space, name, array_init: Vec::new() }
},
".shared" <var:VariableArrayOrPointer<SizedScalarType>> =>? {
let (align, t, name, arr_or_ptr) = var;
let state_space = ast::StateSpace::Shared;
let (v_type, array_init) = match arr_or_ptr {
ast::ArrayOrPointer::Array { dimensions, init } => {
(ast::Type::Array(t, dimensions), init)
}
ast::ArrayOrPointer::Pointer => {
return Err(ParseError::User { error: ast::PtxError::ZeroDimensionArray });
}
};
Ok(ast::Variable { align, v_type, state_space, name, array_init })
}
}
ModuleVariable: (ast::LinkingDirective, ast::Variable<&'input str>) = {
<linking:LinkingDirectives> <state_space:VariableStateSpace> <def:GlobalVariableDefinitionNoArray> => {
let (align, v_type, name, array_init) = def;
(linking, ast::Variable { align, v_type, state_space, name, array_init })
},
<linking:LinkingDirectives> <space:VariableStateSpace> <var:VariableArrayOrPointer<SizedScalarType>> =>? {
let (align, t, name, arr_or_ptr) = var;
let (v_type, state_space, array_init) = match arr_or_ptr {
ast::ArrayOrPointer::Array { dimensions, init } => {
(ast::Type::Array(t, dimensions), space, init)
}
ast::ArrayOrPointer::Pointer => {
if !linking.contains(ast::LinkingDirective::EXTERN) {
return Err(ParseError::User { error: ast::PtxError::NonExternPointer });
}
(ast::Type::Array(t, Vec::new()), space, Vec::new())
}
};
Ok((linking, ast::Variable{ align, v_type, state_space, name, array_init }))
}
}
VariableStateSpace: ast::StateSpace = {
".const" => ast::StateSpace::Const,
".global" => ast::StateSpace::Global,
".shared" => ast::StateSpace::Shared,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
ParamVariable: (Option<u32>, Vec<u8>, ast::Type, &'input str) = {
".param" <var:VariableScalar<LdStScalarType>> => {
let (align, t, name) = var;
let v_type = ast::Type::Scalar(t);
(align, Vec::new(), v_type, name)
},
".param" <var:VariableArrayOrPointer<SizedScalarType>> => {
let (align, t, name, arr_or_ptr) = var;
let (v_type, array_init) = match arr_or_ptr {
ast::ArrayOrPointer::Array { dimensions, init } => {
(ast::Type::Array(t, dimensions), init)
}
ast::ArrayOrPointer::Pointer => {
(ast::Type::Scalar(t), Vec::new())
}
};
(align, array_init, v_type, name)
}
}
ParamDeclaration: (Option<u32>, ast::Type, &'input str) = {
<var:ParamVariable> =>? {
let (align, array_init, v_type, name) = var;
if array_init.len() > 0 {
Err(ParseError::User { error: ast::PtxError::ArrayInitalizer })
} else {
Ok((align, v_type, name))
}
}
}
GlobalVariableDefinitionNoArray: (Option<u32>, ast::Type, &'input str, Vec<u8>) = {
<scalar:VariableScalar<SizedScalarType>> => {
let (align, t, name) = scalar;
let v_type = ast::Type::Scalar(t);
(align, v_type, name, Vec::new())
},
<var:VariableVector<SizedScalarType>> => {
let (align, v_len, t, name) = var;
let v_type = ast::Type::Vector(t, v_len);
(align, v_type, name, Vec::new())
},
}
#[inline]
SizedScalarType: ast::ScalarType = {
".b8" => ast::ScalarType::B8,
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u8" => ast::ScalarType::U8,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s8" => ast::ScalarType::S8,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f16" => ast::ScalarType::F16,
".f16x2" => ast::ScalarType::F16x2,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
}
#[inline]
LdStScalarType: ast::ScalarType = {
".b8" => ast::ScalarType::B8,
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u8" => ast::ScalarType::U8,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s8" => ast::ScalarType::S8,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f16" => ast::ScalarType::F16,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
}
Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
InstLd,
InstMov,
InstMul,
InstAdd,
InstSetp,
InstNot,
InstBra,
InstCvt,
InstShl,
InstShr,
InstSt,
InstRet,
InstCvta,
InstCall,
InstAbs,
InstMad,
InstFma,
InstOr,
InstAnd,
InstSub,
InstMin,
InstMax,
InstRcp,
InstSelp,
InstBar,
InstAtom,
InstAtomCas,
InstDiv,
InstSqrt,
InstRsqrt,
InstNeg,
InstSin,
InstCos,
InstLg2,
InstEx2,
InstClz,
InstBrev,
InstPopc,
InstXor,
InstRem,
InstBfe,
InstBfi,
InstPrmt,
InstActivemask,
InstMembar,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
InstLd: ast::Instruction<ast::ParsedArgParams<'input>> = {
"ld" <q:LdStQualifier?> <ss:LdNonGlobalStateSpace?> <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
ast::Instruction::Ld(
ast::LdDetails {
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
state_space: ss.unwrap_or(ast::StateSpace::Generic),
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
typ: t,
non_coherent: false
},
ast::Arg2Ld { dst:dst, src:src }
)
},
"ld" <q:LdStQualifier?> ".global" <cop:LdCacheOperator?> <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
ast::Instruction::Ld(
ast::LdDetails {
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
state_space: ast::StateSpace::Global,
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
typ: t,
non_coherent: false
},
ast::Arg2Ld { dst:dst, src:src }
)
},
"ld" ".global" <cop:LdNcCacheOperator?> ".nc" <t:LdStType> <dst:DstOperandVec> "," <src:MemoryOperand> => {
ast::Instruction::Ld(
ast::LdDetails {
qualifier: ast::LdStQualifier::Weak,
state_space: ast::StateSpace::Global,
caching: cop.unwrap_or(ast::LdCacheOperator::Cached),
typ: t,
non_coherent: true
},
ast::Arg2Ld { dst:dst, src:src }
)
}
};
LdStType: ast::Type = {
<v:VectorPrefix> <t:LdStScalarType> => ast::Type::Vector(t, v),
<t:LdStScalarType> => ast::Type::Scalar(t),
}
LdStQualifier: ast::LdStQualifier = {
".weak" => ast::LdStQualifier::Weak,
".volatile" => ast::LdStQualifier::Volatile,
".relaxed" <s:MemScope> => ast::LdStQualifier::Relaxed(s),
".acquire" <s:MemScope> => ast::LdStQualifier::Acquire(s),
};
MemScope: ast::MemScope = {
".cta" => ast::MemScope::Cta,
".gpu" => ast::MemScope::Gpu,
".sys" => ast::MemScope::Sys
};
MembarLevel: ast::MemScope = {
".cta" => ast::MemScope::Cta,
".gl" => ast::MemScope::Gpu,
".sys" => ast::MemScope::Sys
};
LdNonGlobalStateSpace: ast::StateSpace = {
".const" => ast::StateSpace::Const,
".local" => ast::StateSpace::Local,
".param" => ast::StateSpace::Param,
".shared" => ast::StateSpace::Shared,
};
LdCacheOperator: ast::LdCacheOperator = {
".ca" => ast::LdCacheOperator::Cached,
".cg" => ast::LdCacheOperator::L2Only,
".cs" => ast::LdCacheOperator::Streaming,
".lu" => ast::LdCacheOperator::LastUse,
".cv" => ast::LdCacheOperator::Uncached,
};
LdNcCacheOperator: ast::LdCacheOperator = {
".ca" => ast::LdCacheOperator::Cached,
".cg" => ast::LdCacheOperator::L2Only,
".cs" => ast::LdCacheOperator::Streaming,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
InstMov: ast::Instruction<ast::ParsedArgParams<'input>> = {
"mov" <pref:VectorPrefix?> <t:MovScalarType> <dst:DstOperandVec> "," <src:SrcOperandVec> => {
let mov_type = match pref {
Some(vec_width) => ast::Type::Vector(t, vec_width),
None => ast::Type::Scalar(t)
};
let details = ast::MovDetails::new(mov_type);
ast::Instruction::Mov(
details,
ast::Arg2Mov { dst, src }
)
}
}
#[inline]
MovScalarType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
".pred" => ast::ScalarType::Pred
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
InstMul: ast::Instruction<ast::ParsedArgParams<'input>> = {
"mul" <d:MulDetails> <a:Arg3> => ast::Instruction::Mul(d, a)
};
MulDetails: ast::MulDetails = {
<ctr:MulIntControl> <t:UIntType> => ast::MulDetails::Unsigned(ast::MulUInt{
typ: t,
control: ctr
}),
<ctr:MulIntControl> <t:SIntType> => ast::MulDetails::Signed(ast::MulSInt{
typ: t,
control: ctr
}),
<f:ArithFloat> => ast::MulDetails::Float(f)
};
MulIntControl: ast::MulIntControl = {
".hi" => ast::MulIntControl::High,
".lo" => ast::MulIntControl::Low,
".wide" => ast::MulIntControl::Wide
};
#[inline]
RoundingModeFloat : ast::RoundingMode = {
".rn" => ast::RoundingMode::NearestEven,
".rz" => ast::RoundingMode::Zero,
".rm" => ast::RoundingMode::NegativeInf,
".rp" => ast::RoundingMode::PositiveInf,
};
RoundingModeInt : ast::RoundingMode = {
".rni" => ast::RoundingMode::NearestEven,
".rzi" => ast::RoundingMode::Zero,
".rmi" => ast::RoundingMode::NegativeInf,
".rpi" => ast::RoundingMode::PositiveInf,
};
IntType : ast::ScalarType = {
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
};
IntType3264: ast::ScalarType = {
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
}
UIntType: ast::ScalarType = {
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
};
SIntType: ast::ScalarType = {
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
};
FloatType: ast::ScalarType = {
".f16" => ast::ScalarType::F16,
".f16x2" => ast::ScalarType::F16x2,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add
InstAdd: ast::Instruction<ast::ParsedArgParams<'input>> = {
"add" <d:ArithDetails> <a:Arg3> => ast::Instruction::Add(d, a)
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp
// TODO: support f16 setp
InstSetp: ast::Instruction<ast::ParsedArgParams<'input>> = {
"setp" <d:SetpMode> <a:Arg4Setp> => ast::Instruction::Setp(d, a),
"setp" <d:SetpBoolMode> <a:Arg5Setp> => ast::Instruction::SetpBool(d, a),
};
SetpMode: ast::SetpData = {
<cmp_op:SetpCompareOp> <t:SetpTypeNoF32> => ast::SetpData {
typ: t,
flush_to_zero: None,
cmp_op: cmp_op,
},
<cmp_op:SetpCompareOp> <ftz:".ftz"?> ".f32" => ast::SetpData {
typ: ast::ScalarType::F32,
flush_to_zero: Some(ftz.is_some()),
cmp_op: cmp_op,
}
};
SetpBoolMode: ast::SetpBoolData = {
<cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <t:SetpTypeNoF32> => ast::SetpBoolData {
typ: t,
flush_to_zero: None,
cmp_op: cmp_op,
bool_op: bool_op,
},
<cmp_op:SetpCompareOp> <bool_op:SetpBoolPostOp> <ftz:".ftz"?> ".f32" => ast::SetpBoolData {
typ: ast::ScalarType::F32,
flush_to_zero: Some(ftz.is_some()),
cmp_op: cmp_op,
bool_op: bool_op,
}
};
SetpCompareOp: ast::SetpCompareOp = {
".eq" => ast::SetpCompareOp::Eq,
".ne" => ast::SetpCompareOp::NotEq,
".lt" => ast::SetpCompareOp::Less,
".le" => ast::SetpCompareOp::LessOrEq,
".gt" => ast::SetpCompareOp::Greater,
".ge" => ast::SetpCompareOp::GreaterOrEq,
".lo" => ast::SetpCompareOp::Less,
".ls" => ast::SetpCompareOp::LessOrEq,
".hi" => ast::SetpCompareOp::Greater,
".hs" => ast::SetpCompareOp::GreaterOrEq,
".equ" => ast::SetpCompareOp::NanEq,
".neu" => ast::SetpCompareOp::NanNotEq,
".ltu" => ast::SetpCompareOp::NanLess,
".leu" => ast::SetpCompareOp::NanLessOrEq,
".gtu" => ast::SetpCompareOp::NanGreater,
".geu" => ast::SetpCompareOp::NanGreaterOrEq,
".num" => ast::SetpCompareOp::IsNotNan,
".nan" => ast::SetpCompareOp::IsAnyNan,
};
SetpBoolPostOp: ast::SetpBoolPostOp = {
".and" => ast::SetpBoolPostOp::And,
".or" => ast::SetpBoolPostOp::Or,
".xor" => ast::SetpBoolPostOp::Xor,
};
SetpTypeNoF32: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f64" => ast::ScalarType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not
InstNot: ast::Instruction<ast::ParsedArgParams<'input>> = {
"not" <t:BooleanType> <a:Arg2> => ast::Instruction::Not(t, a)
};
BooleanType: ast::ScalarType = {
".pred" => ast::ScalarType::Pred,
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at
PredAt: ast::PredAt<&'input str> = {
"@" <label:ExtendedID> => ast::PredAt { not: false, label:label },
"@" "!" <label:ExtendedID> => ast::PredAt { not: true, label:label }
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra
InstBra: ast::Instruction<ast::ParsedArgParams<'input>> = {
"bra" <u:".uni"?> <a:Arg1> => ast::Instruction::Bra(ast::BraData{ uniform: u.is_some() }, a)
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt
InstCvt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"cvt" <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeInt> <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::new_int_from_int_checked(
s.is_some(),
dst_t,
src_t,
errors
),
a)
},
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeFloat> <src_t:CvtTypeInt> <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::new_float_from_int_checked(
r,
f.is_some(),
s.is_some(),
dst_t,
src_t,
errors
),
a)
},
"cvt" <r:RoundingModeInt> <f:".ftz"?> <s:".sat"?> <dst_t:CvtTypeInt> <src_t:CvtTypeFloat> <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::new_int_from_float_checked(
r,
f.is_some(),
s.is_some(),
dst_t,
src_t,
errors
),
a)
},
"cvt" <r:RoundingModeInt?> <s:".sat"?> ".f16" ".f16" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: r,
flush_to_zero: None,
saturate: s.is_some(),
dst: ast::ScalarType::F16,
src: ast::ScalarType::F16
}
), a)
},
"cvt" <f:".ftz"?> <s:".sat"?> ".f32" ".f16" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: None,
flush_to_zero: Some(f.is_some()),
saturate: s.is_some(),
dst: ast::ScalarType::F32,
src: ast::ScalarType::F16
}
), a)
},
"cvt" <s:".sat"?> ".f64" ".f16" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: None,
flush_to_zero: None,
saturate: s.is_some(),
dst: ast::ScalarType::F64,
src: ast::ScalarType::F16
}
), a)
},
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f16" ".f32" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: Some(r),
flush_to_zero: Some(f.is_some()),
saturate: s.is_some(),
dst: ast::ScalarType::F16,
src: ast::ScalarType::F32
}
), a)
},
"cvt" <r:RoundingModeInt?> <f:".ftz"?> <s:".sat"?> ".f32" ".f32" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: r,
flush_to_zero: Some(f.is_some()),
saturate: s.is_some(),
dst: ast::ScalarType::F32,
src: ast::ScalarType::F32
}
), a)
},
"cvt" <s:".sat"?> <f:".ftz"?> ".f64" ".f32" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: None,
flush_to_zero: Some(f.is_some()),
saturate: s.is_some(),
dst: ast::ScalarType::F64,
src: ast::ScalarType::F32
}
), a)
},
"cvt" <r:RoundingModeFloat> <s:".sat"?> ".f16" ".f64" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: Some(r),
flush_to_zero: None,
saturate: s.is_some(),
dst: ast::ScalarType::F16,
src: ast::ScalarType::F64
}
), a)
},
"cvt" <r:RoundingModeFloat> <f:".ftz"?> <s:".sat"?> ".f32" ".f64" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: Some(r),
flush_to_zero: Some(s.is_some()),
saturate: s.is_some(),
dst: ast::ScalarType::F32,
src: ast::ScalarType::F64
}
), a)
},
"cvt" <r:RoundingModeInt?> <s:".sat"?> ".f64" ".f64" <a:Arg2> => {
ast::Instruction::Cvt(ast::CvtDetails::FloatFromFloat(
ast::CvtDesc {
rounding: r,
flush_to_zero: None,
saturate: s.is_some(),
dst: ast::ScalarType::F64,
src: ast::ScalarType::F64
}
), a)
},
};
CvtTypeInt: ast::ScalarType = {
".u8" => ast::ScalarType::U8,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s8" => ast::ScalarType::S8,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
};
CvtTypeFloat: ast::ScalarType = {
".f16" => ast::ScalarType::F16,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl
InstShl: ast::Instruction<ast::ParsedArgParams<'input>> = {
"shl" <t:ShlType> <a:Arg3> => ast::Instruction::Shl(t, a)
};
ShlType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shr
InstShr: ast::Instruction<ast::ParsedArgParams<'input>> = {
"shr" <t:ShrType> <a:Arg3> => ast::Instruction::Shr(t, a)
};
ShrType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
// Warning: NVIDIA documentation is incorrect, you can specify scope only once
InstSt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"st" <q:LdStQualifier?> <ss:StStateSpace?> <cop:StCacheOperator?> <t:LdStType> <src1:MemoryOperand> "," <src2:SrcOperandVec> => {
ast::Instruction::St(
ast::StData {
qualifier: q.unwrap_or(ast::LdStQualifier::Weak),
state_space: ss.unwrap_or(ast::StateSpace::Generic),
caching: cop.unwrap_or(ast::StCacheOperator::Writeback),
typ: t
},
ast::Arg2St { src1:src1, src2:src2 }
)
}
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#using-addresses-arrays-and-vectors
MemoryOperand: ast::Operand<&'input str> = {
"[" <o:Operand> "]" => o
}
StStateSpace: ast::StateSpace = {
".global" => ast::StateSpace::Global,
".local" => ast::StateSpace::Local,
".param" => ast::StateSpace::Param,
".shared" => ast::StateSpace::Shared,
};
StCacheOperator: ast::StCacheOperator = {
".wb" => ast::StCacheOperator::Writeback,
".cg" => ast::StCacheOperator::L2Only,
".cs" => ast::StCacheOperator::Streaming,
".wt" => ast::StCacheOperator::Writethrough,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
InstRet: ast::Instruction<ast::ParsedArgParams<'input>> = {
"ret" <u:".uni"?> => ast::Instruction::Ret(ast::RetData { uniform: u.is_some() })
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvta
InstCvta: ast::Instruction<ast::ParsedArgParams<'input>> = {
"cvta" <from:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
ast::Instruction::Cvta(ast::CvtaDetails {
to: ast::StateSpace::Generic,
from,
size: s
},
a)
},
"cvta" ".to" <to:CvtaStateSpace> <s:CvtaSize> <a:Arg2> => {
ast::Instruction::Cvta(ast::CvtaDetails {
to,
from: ast::StateSpace::Generic,
size: s
},
a)
}
}
CvtaStateSpace: ast::StateSpace = {
".const" => ast::StateSpace::Const,
".global" => ast::StateSpace::Global,
".local" => ast::StateSpace::Local,
".shared" => ast::StateSpace::Shared,
}
CvtaSize: ast::CvtaSize = {
".u32" => ast::CvtaSize::U32,
".u64" => ast::CvtaSize::U64,
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-call
InstCall: ast::Instruction<ast::ParsedArgParams<'input>> = {
"call" <u:".uni"?> <args:ArgCall> => {
let (ret_params, func, param_list) = args;
ast::Instruction::Call(ast::CallInst { uniform: u.is_some(), ret_params, func, param_list })
}
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-abs
InstAbs: ast::Instruction<ast::ParsedArgParams<'input>> = {
"abs" <t:SignedIntType> <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: t }, a)
},
"abs" <f:".ftz"?> ".f32" <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F32 }, a)
},
"abs" ".f64" <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: None, typ: ast::ScalarType::F64 }, a)
},
"abs" <f:".ftz"?> ".f16" <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16 }, a)
},
"abs" <f:".ftz"?> ".f16x2" <a:Arg2> => {
ast::Instruction::Abs(ast::AbsDetails { flush_to_zero: Some(f.is_some()), typ: ast::ScalarType::F16x2 }, a)
},
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mad
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mad
InstMad: ast::Instruction<ast::ParsedArgParams<'input>> = {
"mad" <d:MulDetails> <a:Arg4> => ast::Instruction::Mad(d, a),
"mad" ".hi" ".sat" ".s32" => todo!(),
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-fma
InstFma: ast::Instruction<ast::ParsedArgParams<'input>> = {
"fma" <f:ArithFloatMustRound> <a:Arg4> => ast::Instruction::Fma(f, a),
};
SignedIntType: ast::ScalarType = {
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-or
InstOr: ast::Instruction<ast::ParsedArgParams<'input>> = {
"or" <d:BooleanType> <a:Arg3> => ast::Instruction::Or(d, a),
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-and
InstAnd: ast::Instruction<ast::ParsedArgParams<'input>> = {
"and" <d:BooleanType> <a:Arg3> => ast::Instruction::And(d, a),
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp
InstRcp: ast::Instruction<ast::ParsedArgParams<'input>> = {
"rcp" <rounding:RcpRoundingMode> <ftz:".ftz"?> ".f32" <a:Arg2> => {
let details = ast::RcpDetails {
rounding,
flush_to_zero: Some(ftz.is_some()),
is_f64: false,
};
ast::Instruction::Rcp(details, a)
},
"rcp" <rn:RoundingModeFloat> ".f64" <a:Arg2> => {
let details = ast::RcpDetails {
rounding: Some(rn),
flush_to_zero: None,
is_f64: true,
};
ast::Instruction::Rcp(details, a)
}
};
RcpRoundingMode: Option<ast::RoundingMode> = {
".approx" => None,
<r:RoundingModeFloat> => Some(r)
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-sub
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sub
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-sub
InstSub: ast::Instruction<ast::ParsedArgParams<'input>> = {
"sub" <d:ArithDetails> <a:Arg3> => ast::Instruction::Sub(d, a),
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-min
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-min
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-min
InstMin: ast::Instruction<ast::ParsedArgParams<'input>> = {
"min" <d:MinMaxDetails> <a:Arg3> => ast::Instruction::Min(d, a),
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-max
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-max
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-max
InstMax: ast::Instruction<ast::ParsedArgParams<'input>> = {
"max" <d:MinMaxDetails> <a:Arg3> => ast::Instruction::Max(d, a),
};
MinMaxDetails: ast::MinMaxDetails = {
<t:UIntType> => ast::MinMaxDetails::Unsigned(t),
<t:SIntType> => ast::MinMaxDetails::Signed(t),
<ftz:".ftz"?> <nan:".NaN"?> ".f32" => ast::MinMaxDetails::Float(
ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F32 }
),
".f64" => ast::MinMaxDetails::Float(
ast::MinMaxFloat{ flush_to_zero: None, nan: false, typ: ast::ScalarType::F64 }
),
<ftz:".ftz"?> <nan:".NaN"?> ".f16" => ast::MinMaxDetails::Float(
ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16 }
),
<ftz:".ftz"?> <nan:".NaN"?> ".f16x2" => ast::MinMaxDetails::Float(
ast::MinMaxFloat{ flush_to_zero: Some(ftz.is_some()), nan: nan.is_some(), typ: ast::ScalarType::F16x2 }
)
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp
InstSelp: ast::Instruction<ast::ParsedArgParams<'input>> = {
"selp" <t:SelpType> <a:Arg4> => ast::Instruction::Selp(t, a),
};
SelpType: ast::ScalarType = {
".b16" => ast::ScalarType::B16,
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
".u16" => ast::ScalarType::U16,
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f32" => ast::ScalarType::F32,
".f64" => ast::ScalarType::F64,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar
InstBar: ast::Instruction<ast::ParsedArgParams<'input>> = {
"bar" ".sync" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a),
"barrier" ".sync" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a),
"barrier" ".sync" ".aligned" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a),
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-atom
// The documentation does not mention all spported operations:
// * Operation .add requires .u32 or .s32 or .u64 or .f64 or f16 or f16x2 or .f32
// * Operation .inc requires .u32 type for instuction
// * Operation .dec requires .u32 type for instuction
// Otherwise as documented
InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = {
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op:AtomBitOp> <typ:BitType> <a:Arg3Atom> => {
let details = ast::AtomDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
space: space.unwrap_or(ast::StateSpace::Generic),
inner: ast::AtomInnerDetails::Bit { op, typ }
};
ast::Instruction::Atom(details,a)
},
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".inc" ".u32" <a:Arg3Atom> => {
let details = ast::AtomDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
space: space.unwrap_or(ast::StateSpace::Generic),
inner: ast::AtomInnerDetails::Unsigned {
op: ast::AtomUIntOp::Inc,
typ: ast::ScalarType::U32
}
};
ast::Instruction::Atom(details,a)
},
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".dec" ".u32" <a:Arg3Atom> => {
let details = ast::AtomDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
space: space.unwrap_or(ast::StateSpace::Generic),
inner: ast::AtomInnerDetails::Unsigned {
op: ast::AtomUIntOp::Dec,
typ: ast::ScalarType::U32
}
};
ast::Instruction::Atom(details,a)
},
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".add" <typ:FloatType> <a:Arg3Atom> => {
let op = ast::AtomFloatOp::Add;
let details = ast::AtomDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
space: space.unwrap_or(ast::StateSpace::Generic),
inner: ast::AtomInnerDetails::Float { op, typ }
};
ast::Instruction::Atom(details,a)
},
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomUIntOp> <typ:UIntType3264> <a:Arg3Atom> => {
let details = ast::AtomDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
space: space.unwrap_or(ast::StateSpace::Generic),
inner: ast::AtomInnerDetails::Unsigned { op, typ }
};
ast::Instruction::Atom(details,a)
},
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomSIntOp> <typ:SIntType3264> <a:Arg3Atom> => {
let details = ast::AtomDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
space: space.unwrap_or(ast::StateSpace::Generic),
inner: ast::AtomInnerDetails::Signed { op, typ }
};
ast::Instruction::Atom(details,a)
}
}
InstAtomCas: ast::Instruction<ast::ParsedArgParams<'input>> = {
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".cas" <typ:BitType> <a:Arg4Atom> => {
let details = ast::AtomCasDetails {
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
scope: scope.unwrap_or(ast::MemScope::Gpu),
space: space.unwrap_or(ast::StateSpace::Generic),
typ,
};
ast::Instruction::AtomCas(details,a)
},
}
AtomSemantics: ast::AtomSemantics = {
".relaxed" => ast::AtomSemantics::Relaxed,
".acquire" => ast::AtomSemantics::Acquire,
".release" => ast::AtomSemantics::Release,
".acq_rel" => ast::AtomSemantics::AcquireRelease
}
AtomSpace: ast::StateSpace = {
".global" => ast::StateSpace::Global,
".shared" => ast::StateSpace::Shared
}
AtomBitOp: ast::AtomBitOp = {
".and" => ast::AtomBitOp::And,
".or" => ast::AtomBitOp::Or,
".xor" => ast::AtomBitOp::Xor,
".exch" => ast::AtomBitOp::Exchange,
}
AtomUIntOp: ast::AtomUIntOp = {
".add" => ast::AtomUIntOp::Add,
".min" => ast::AtomUIntOp::Min,
".max" => ast::AtomUIntOp::Max,
}
AtomSIntOp: ast::AtomSIntOp = {
".add" => ast::AtomSIntOp::Add,
".min" => ast::AtomSIntOp::Min,
".max" => ast::AtomSIntOp::Max,
}
BitType: ast::ScalarType = {
".b32" => ast::ScalarType::B32,
".b64" => ast::ScalarType::B64,
}
UIntType3264: ast::ScalarType = {
".u32" => ast::ScalarType::U32,
".u64" => ast::ScalarType::U64,
}
SIntType3264: ast::ScalarType = {
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-div
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-div
InstDiv: ast::Instruction<ast::ParsedArgParams<'input>> = {
"div" <t:UIntType> <a:Arg3> => ast::Instruction::Div(ast::DivDetails::Unsigned(t), a),
"div" <t:SIntType> <a:Arg3> => ast::Instruction::Div(ast::DivDetails::Signed(t), a),
"div" <kind:DivFloatKind> <ftz:".ftz"?> ".f32" <a:Arg3> => {
let inner = ast::DivFloatDetails {
typ: ast::ScalarType::F32,
flush_to_zero: Some(ftz.is_some()),
kind
};
ast::Instruction::Div(ast::DivDetails::Float(inner), a)
},
"div" <rnd:RoundingModeFloat> ".f64" <a:Arg3> => {
let inner = ast::DivFloatDetails {
typ: ast::ScalarType::F64,
flush_to_zero: None,
kind: ast::DivFloatKind::Rounding(rnd)
};
ast::Instruction::Div(ast::DivDetails::Float(inner), a)
},
}
DivFloatKind: ast::DivFloatKind = {
".approx" => ast::DivFloatKind::Approx,
".full" => ast::DivFloatKind::Full,
<rnd:RoundingModeFloat> => ast::DivFloatKind::Rounding(rnd),
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sqrt
InstSqrt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"sqrt" ".approx" <ftz:".ftz"?> ".f32" <a:Arg2> => {
let details = ast::SqrtDetails {
typ: ast::ScalarType::F32,
flush_to_zero: Some(ftz.is_some()),
kind: ast::SqrtKind::Approx,
};
ast::Instruction::Sqrt(details, a)
},
"sqrt" <rnd:RoundingModeFloat> <ftz:".ftz"?> ".f32" <a:Arg2> => {
let details = ast::SqrtDetails {
typ: ast::ScalarType::F32,
flush_to_zero: Some(ftz.is_some()),
kind: ast::SqrtKind::Rounding(rnd),
};
ast::Instruction::Sqrt(details, a)
},
"sqrt" <rnd:RoundingModeFloat> ".f64" <a:Arg2> => {
let details = ast::SqrtDetails {
typ: ast::ScalarType::F64,
flush_to_zero: None,
kind: ast::SqrtKind::Rounding(rnd),
};
ast::Instruction::Sqrt(details, a)
}
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt-approx-ftz-f64
InstRsqrt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"rsqrt" ".approx" <ftz:".ftz"?> ".f32" <a:Arg2> => {
let details = ast::RsqrtDetails {
typ: ast::ScalarType::F32,
flush_to_zero: ftz.is_some(),
};
ast::Instruction::Rsqrt(details, a)
},
"rsqrt" ".approx" <ftz:".ftz"?> ".f64" <a:Arg2> => {
let details = ast::RsqrtDetails {
typ: ast::ScalarType::F64,
flush_to_zero: ftz.is_some(),
};
ast::Instruction::Rsqrt(details, a)
},
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-neg
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-neg
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-neg
InstNeg: ast::Instruction<ast::ParsedArgParams<'input>> = {
"neg" <ftz:".ftz"?> <typ:NegTypeFtz> <a:Arg2> => {
let details = ast::NegDetails {
typ,
flush_to_zero: Some(ftz.is_some()),
};
ast::Instruction::Neg(details, a)
},
"neg" <typ:NegTypeNonFtz> <a:Arg2> => {
let details = ast::NegDetails {
typ,
flush_to_zero: None,
};
ast::Instruction::Neg(details, a)
},
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sin
InstSin: ast::Instruction<ast::ParsedArgParams<'input>> = {
"sin" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
ast::Instruction::Sin{ flush_to_zero: ftz.is_some(), arg }
},
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-cos
InstCos: ast::Instruction<ast::ParsedArgParams<'input>> = {
"cos" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
ast::Instruction::Cos{ flush_to_zero: ftz.is_some(), arg }
},
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-lg2
InstLg2: ast::Instruction<ast::ParsedArgParams<'input>> = {
"lg2" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
ast::Instruction::Lg2{ flush_to_zero: ftz.is_some(), arg }
},
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-ex2
InstEx2: ast::Instruction<ast::ParsedArgParams<'input>> = {
"ex2" ".approx" <ftz:".ftz"?> ".f32" <arg:Arg2> => {
ast::Instruction::Ex2{ flush_to_zero: ftz.is_some(), arg }
},
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-clz
InstClz: ast::Instruction<ast::ParsedArgParams<'input>> = {
"clz" <typ:BitType> <arg:Arg2> => ast::Instruction::Clz{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-brev
InstBrev: ast::Instruction<ast::ParsedArgParams<'input>> = {
"brev" <typ:BitType> <arg:Arg2> => ast::Instruction::Brev{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-popc
InstPopc: ast::Instruction<ast::ParsedArgParams<'input>> = {
"popc" <typ:BitType> <arg:Arg2> => ast::Instruction::Popc{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-xor
InstXor: ast::Instruction<ast::ParsedArgParams<'input>> = {
"xor" <typ:BooleanType> <arg:Arg3> => ast::Instruction::Xor{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfe
InstBfe: ast::Instruction<ast::ParsedArgParams<'input>> = {
"bfe" <typ:IntType3264> <arg:Arg4> => ast::Instruction::Bfe{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-bfi
InstBfi: ast::Instruction<ast::ParsedArgParams<'input>> = {
"bfi" <typ:BitType> <arg:Arg5> => ast::Instruction::Bfi{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-prmt
InstPrmt: ast::Instruction<ast::ParsedArgParams<'input>> = {
"prmt" ".b32" <arg:Arg3> "," <control:U16Num> => ast::Instruction::Prmt{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-rem
InstRem: ast::Instruction<ast::ParsedArgParams<'input>> = {
"rem" <typ:IntType> <arg:Arg3> => ast::Instruction::Rem{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-activemask
InstActivemask: ast::Instruction<ast::ParsedArgParams<'input>> = {
"activemask" ".b32" <arg:Arg1> => ast::Instruction::Activemask{ <> }
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar
InstMembar: ast::Instruction<ast::ParsedArgParams<'input>> = {
"membar" <level:MembarLevel> => ast::Instruction::Membar{ <> }
}
NegTypeFtz: ast::ScalarType = {
".f16" => ast::ScalarType::F16,
".f16x2" => ast::ScalarType::F16x2,
".f32" => ast::ScalarType::F32,
}
NegTypeNonFtz: ast::ScalarType = {
".s16" => ast::ScalarType::S16,
".s32" => ast::ScalarType::S32,
".s64" => ast::ScalarType::S64,
".f64" => ast::ScalarType::F64
}
ArithDetails: ast::ArithDetails = {
<t:UIntType> => ast::ArithDetails::Unsigned(t),
<t:SIntType> => ast::ArithDetails::Signed(ast::ArithSInt {
typ: t,
saturate: false,
}),
".sat" ".s32" => ast::ArithDetails::Signed(ast::ArithSInt {
typ: ast::ScalarType::S32,
saturate: true,
}),
<f:ArithFloat> => ast::ArithDetails::Float(f)
}
ArithFloat: ast::ArithFloat = {
<rn:RoundingModeFloat?> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::ArithFloat {
typ: ast::ScalarType::F32,
rounding: rn,
flush_to_zero: Some(ftz.is_some()),
saturate: sat.is_some(),
},
<rn:RoundingModeFloat?> ".f64" => ast::ArithFloat {
typ: ast::ScalarType::F64,
rounding: rn,
flush_to_zero: None,
saturate: false,
},
<rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?> ".f16" => ast::ArithFloat {
typ: ast::ScalarType::F16,
rounding: rn.map(|_| ast::RoundingMode::NearestEven),
flush_to_zero: Some(ftz.is_some()),
saturate: sat.is_some(),
},
<rn:".rn"?> <ftz:".ftz"?> <sat:".sat"?> ".f16x2" => ast::ArithFloat {
typ: ast::ScalarType::F16x2,
rounding: rn.map(|_| ast::RoundingMode::NearestEven),
flush_to_zero: Some(ftz.is_some()),
saturate: sat.is_some(),
},
}
ArithFloatMustRound: ast::ArithFloat = {
<rn:RoundingModeFloat> <ftz:".ftz"?> <sat:".sat"?> ".f32" => ast::ArithFloat {
typ: ast::ScalarType::F32,
rounding: Some(rn),
flush_to_zero: Some(ftz.is_some()),
saturate: sat.is_some(),
},
<rn:RoundingModeFloat> ".f64" => ast::ArithFloat {
typ: ast::ScalarType::F64,
rounding: Some(rn),
flush_to_zero: None,
saturate: false,
},
".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16" => ast::ArithFloat {
typ: ast::ScalarType::F16,
rounding: Some(ast::RoundingMode::NearestEven),
flush_to_zero: Some(ftz.is_some()),
saturate: sat.is_some(),
},
".rn" <ftz:".ftz"?> <sat:".sat"?> ".f16x2" => ast::ArithFloat {
typ: ast::ScalarType::F16x2,
rounding: Some(ast::RoundingMode::NearestEven),
flush_to_zero: Some(ftz.is_some()),
saturate: sat.is_some(),
},
}
Operand: ast::Operand<&'input str> = {
<r:ExtendedID> => ast::Operand::Reg(r),
<r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset),
<x:ImmediateValue> => ast::Operand::Imm(x)
};
CallOperand: ast::Operand<&'input str> = {
<r:ExtendedID> => ast::Operand::Reg(r),
<x:ImmediateValue> => ast::Operand::Imm(x)
};
// TODO: start parsing whole constants sub-language:
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants
ImmediateValue: ast::ImmediateValue = {
// TODO: treat negation correctly
<neg:"-"?> <x:NumToken> =>? {
let (num, radix, is_unsigned) = x;
if neg.is_some() {
match i64::from_str_radix(num, radix) {
Ok(x) => Ok(ast::ImmediateValue::S64(-x)),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
} else if is_unsigned {
match u64::from_str_radix(num, radix) {
Ok(x) => Ok(ast::ImmediateValue::U64(x)),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
} else {
match i64::from_str_radix(num, radix) {
Ok(x) => Ok(ast::ImmediateValue::S64(x)),
Err(_) => {
match u64::from_str_radix(num, radix) {
Ok(x) => Ok(ast::ImmediateValue::U64(x)),
Err(err) => Err(ParseError::User { error: ast::PtxError::from(err) })
}
}
}
}
},
<f:F32Num> => {
ast::ImmediateValue::F32(f)
},
<f:F64Num> => {
ast::ImmediateValue::F64(f)
}
}
Arg1: ast::Arg1<ast::ParsedArgParams<'input>> = {
<src:ExtendedID> => ast::Arg1{<>}
};
Arg1Bar: ast::Arg1Bar<ast::ParsedArgParams<'input>> = {
<src:Operand> => ast::Arg1Bar{<>}
};
Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
<dst:DstOperand> "," <src:Operand> => ast::Arg2{<>}
};
MemberOperand: (&'input str, u8) = {
<pref:ExtendedID> "." <suf:ExtendedID> =>? {
let suf_idx = vector_index(suf)?;
Ok((pref, suf_idx))
},
<pref:ExtendedID> <suf:DotID> =>? {
let suf_idx = vector_index(&suf[1..])?;
Ok((pref, suf_idx))
}
};
VectorExtract: Vec<&'input str> = {
"{" <r1:ExtendedID> "," <r2:ExtendedID> "}" => {
vec![r1, r2]
},
"{" <r1:ExtendedID> "," <r2:ExtendedID> "," <r3:ExtendedID> "," <r4:ExtendedID> "}" => {
vec![r1, r2, r3, r4]
},
};
Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
<dst:DstOperand> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
};
Arg3Atom: ast::Arg3<ast::ParsedArgParams<'input>> = {
<dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> => ast::Arg3{<>}
};
Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
<dst:DstOperand> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
};
Arg4Atom: ast::Arg4<ast::ParsedArgParams<'input>> = {
<dst:DstOperand> "," "[" <src1:Operand> "]" "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
};
Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
};
Arg5: ast::Arg5<ast::ParsedArgParams<'input>> = {
<dst:DstOperand> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> "," <src4:Operand> => ast::Arg5{<>}
};
// TODO: pass src3 negation somewhere
Arg5Setp: ast::Arg5Setp<ast::ParsedArgParams<'input>> = {
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5Setp{<>}
};
ArgCall: (Vec<&'input str>, &'input str, Vec<ast::Operand<&'input str>>) = {
"(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => {
(ret_params, func, param_list)
},
"(" <ret_params:Comma<ExtendedID>> ")" "," <func:ExtendedID> => {
(ret_params, func, Vec::new())
},
<func:ExtendedID> "," "(" <param_list:Comma<CallOperand>> ")" => (Vec::new(), func, param_list),
<func:ExtendedID> => (Vec::new(), func, Vec::<ast::Operand<_>>::new()),
};
OptionalDst: &'input str = {
"|" <dst2:ExtendedID> => dst2
}
SrcOperand: ast::Operand<&'input str> = {
<r:ExtendedID> => ast::Operand::Reg(r),
<r:ExtendedID> "+" <offset:S32Num> => ast::Operand::RegOffset(r, offset),
<x:ImmediateValue> => ast::Operand::Imm(x),
<mem_op:MemberOperand> => {
let (reg, idx) = mem_op;
ast::Operand::VecMember(reg, idx)
}
}
SrcOperandVec: ast::Operand<&'input str> = {
<normal:SrcOperand> => normal,
<vec:VectorExtract> => ast::Operand::VecPack(vec),
}
DstOperand: ast::Operand<&'input str> = {
<r:ExtendedID> => ast::Operand::Reg(r),
<mem_op:MemberOperand> => {
let (reg, idx) = mem_op;
ast::Operand::VecMember(reg, idx)
}
}
DstOperandVec: ast::Operand<&'input str> = {
<normal:DstOperand> => normal,
<vec:VectorExtract> => ast::Operand::VecPack(vec),
}
VectorPrefix: u8 = {
".v2" => 2,
".v4" => 4
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file
File = {
".file" U32Num String ("," U32Num "," U32Num)?
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section
Section = {
".section" DotID "{" SectionDwarfLines* "}"
};
SectionDwarfLines: () = {
AnyBitType Comma<U32Num>,
".b32" SectionLabel,
".b64" SectionLabel,
".b32" SectionLabel "+" U32Num,
".b64" SectionLabel "+" U32Num,
};
SectionLabel = {
ID,
DotID
};
AnyBitType = {
".b8", ".b16", ".b32", ".b64"
};
VariableScalar<T>: (Option<u32>, T, &'input str) = {
<align:Align?> <v_type:T> <name:ExtendedID> => {
(align, v_type, name)
}
}
VariableVector<T>: (Option<u32>, u8, T, &'input str) = {
<align:Align?> <v_len:VectorPrefix> <v_type:T> <name:ExtendedID> => {
(align, v_len, v_type, name)
}
}
// empty dimensions [0] means it's a pointer
VariableArrayOrPointer<T>: (Option<u32>, T, &'input str, ast::ArrayOrPointer) = {
<align:Align?> <typ:SizedScalarType> <name:ExtendedID> <dims:ArrayDimensions> <init:ArrayInitializer?> =>? {
let mut dims = dims;
let array_init = match init {
Some(init) => {
let init_vec = init.to_vec(typ, &mut dims)?;
ast::ArrayOrPointer::Array { dimensions: dims, init: init_vec }
}
None => {
if dims.len() > 1 && dims.contains(&0) {
return Err(ParseError::User { error: ast::PtxError::ZeroDimensionArray })
}
match &*dims {
[0] => ast::ArrayOrPointer::Pointer,
_ => ast::ArrayOrPointer::Array { dimensions: dims, init: Vec::new() }
}
}
};
Ok((align, typ, name, array_init))
}
}
// [0] and [] are treated the same
ArrayDimensions: Vec<u32> = {
ArrayEmptyDimension => vec![0u32],
ArrayEmptyDimension <dims:ArrayDimension+> => {
let mut dims = dims;
let mut result = vec![0u32];
result.append(&mut dims);
result
},
<dims:ArrayDimension+> => dims
}
ArrayEmptyDimension = {
"[" "]"
}
ArrayDimension: u32 = {
"[" <n:U32Num> "]" => n,
}
ArrayInitializer: ast::NumsOrArrays<'input> = {
"=" <nums:NumsOrArraysBracket> => nums
}
NumsOrArraysBracket: ast::NumsOrArrays<'input> = {
"{" <nums:NumsOrArrays> "}" => nums
}
NumsOrArrays: ast::NumsOrArrays<'input> = {
<n:Comma<NumsOrArraysBracket>> => ast::NumsOrArrays::Arrays(n),
<n:CommaNonEmpty<NumToken>> => ast::NumsOrArrays::Nums(n.into_iter().map(|(x,radix,_)| (x, radix)).collect()),
}
Comma<T>: Vec<T> = {
<v:(<T> ",")*> <e:T?> => match e {
None => v,
Some(e) => {
let mut v = v;
v.push(e);
v
}
}
};
CommaNonEmpty<T>: Vec<T> = {
<v:(<T> ",")*> <e:T> => {
let mut v = v;
v.push(e);
v
}
};
#[inline]
Or<T1, T2>: T1 = {
T1,
T2
}
#[inline]
Or3<T1, T2, T3>: T1 = {
T1,
T2,
T3
}