ZLUDA/ptx/src/ptx.lalrpop

use crate::ast;
use crate::ast::UnwrapWithVec;
use crate::without_none;

grammar<'a>(errors: &mut Vec<ast::PtxError>);

match {
    r"\s+" => { },
    r"//[^\n\r]*[\n\r]*" => { },
    r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { },
    r"sm_[0-9]+" => ShaderModel,
    r"-?[?:0x]?[0-9]+" => Num
} else {
    r"(?:[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+)<[0-9]+>" => ParametrizedID,
} else {
    _
}

pub Module: ast::Module<'input> = {
    <v:Version> Target <f:Directive*> => {
        ast::Module { version: v, functions: without_none(f) }
    }
};

Version: (u8, u8) = {
    ".version" <v:VersionNumber> => {
        let dot = v.find('.').unwrap();
        let major = v[..dot].parse::<u8>();
        let minor = v[dot+1..].parse::<u8>();
        (major,minor).unwrap_with(errors)
    }
}

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#ptx-module-directives-target
Target = {
    ".target" Comma<TargetSpecifier>
};

TargetSpecifier = {
    ShaderModel,
    "texmode_unified",
    "texmode_independent",
    "debug",
    "map_f64_to_f32"
};

Directive: Option<ast::Function<'input>> = {
    AddressSize => None,
    <f:Function> => Some(f),
    File => None,
    Section => None
};

AddressSize = {
    ".address_size" Num
};

Function: ast::Function<'input>  = {
    LinkingDirective*
    <kernel:IsKernel>
    <name:ID>
    "(" <args:Comma<FunctionInput>> ")"
    <body:FunctionBody> => ast::Function{<>}
};

LinkingDirective = {
    ".extern",
    ".visible",
    ".weak"
};

IsKernel: bool = {
    ".entry" => true,
    ".func" => false
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parameter-state-space
FunctionInput: ast::Argument<'input> = {
    ".param" <_type:ScalarType> <name:ID> => {
        ast::Argument {a_type: _type, name: name, length: 1 }
    },
    ".param" <a_type:ScalarType> <name:ID> "[" <length:Num> "]" => {
        let length = length.parse::<u32>();
        let length = length.unwrap_with(errors);
        ast::Argument { a_type: a_type, name: name, length: length }
    }
};

pub(crate) FunctionBody: Vec<ast::Statement<&'input str>> = {
    "{" <s:Statement*> "}" => { without_none(s) }
};

StateSpaceSpecifier: ast::StateSpace = {
    ".reg" => ast::StateSpace::Reg,
    ".sreg" => ast::StateSpace::Sreg,
    ".const" => ast::StateSpace::Const,
    ".global" => ast::StateSpace::Global,
    ".local" => ast::StateSpace::Local,
    ".shared" => ast::StateSpace::Shared
};


Type: ast::Type = {
    <t:ScalarType> => ast::Type::Scalar(t),
    <t:ExtendedScalarType> => ast::Type::ExtendedScalar(t),
};

ScalarType: ast::ScalarType = {
    ".b8" => ast::ScalarType::B8,
    ".b16" => ast::ScalarType::B16,
    ".b32" => ast::ScalarType::B32,
    ".b64" => ast::ScalarType::B64,
    ".u8" => ast::ScalarType::U8,
    ".u16" => ast::ScalarType::U16,
    ".u32" => ast::ScalarType::U32,
    ".u64" => ast::ScalarType::U64,
    ".s8" => ast::ScalarType::S8,
    ".s16" => ast::ScalarType::S16,
    ".s32" => ast::ScalarType::S32,
    ".s64" => ast::ScalarType::S64,
    ".f16" => ast::ScalarType::F16,
    ".f32" => ast::ScalarType::F32,
    ".f64" => ast::ScalarType::F64,
};

ExtendedScalarType: ast::ExtendedScalarType = {
    ".f16x2" => ast::ExtendedScalarType::F16x2,
    ".pred" => ast::ExtendedScalarType::Pred,
};

BaseType = {
    ".b8", ".b16", ".b32", ".b64",
    ".u8", ".u16", ".u32", ".u64",
    ".s8", ".s16", ".s32", ".s64",
    ".f32", ".f64"
};

Statement: Option<ast::Statement<&'input str>> = {
    <l:Label> => Some(ast::Statement::Label(l)),
    DebugDirective => None,
    <v:Variable> ";" => Some(ast::Statement::Variable(v)),
    <p:PredAt?> <i:Instruction> ";" => Some(ast::Statement::Instruction(p, i))
};

DebugDirective: () = {
    DebugLocation
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-loc
DebugLocation = {
    ".loc" Num Num Num
};

Label: &'input str = {
    <id:ID> ":" => id
};

Variable: ast::Variable<&'input str> = {
    <s:StateSpaceSpecifier> <t:Type> <v:VariableName> => {
        let (name, count) = v;
        ast::Variable { space: s, v_type: t, name: name, count: count }
    }
};

VariableName: (&'input str, Option<u32>) = {
    <id:ID> => (id, None),
    <id:ParametrizedID> => {
        let left_angle = id.as_bytes().iter().copied().position(|x| x == b'<').unwrap();
        let count = id[left_angle+1..id.len()-1].parse::<u32>();
        let count = match count {
            Ok(c) => Some(c),
            Err(e) => { errors.push(e.into()); None },
        };
        (&id[0..left_angle], count)
    }
};

Instruction: ast::Instruction<&'input str> = {
    InstLd,
    InstMov,
    InstMul,
    InstAdd,
    InstSetp,
    InstNot,
    InstBra,
    InstCvt,
    InstShl,
    InstSt,
    InstRet,
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
InstLd: ast::Instruction<&'input str> = {
    "ld" LdQualifier? LdStateSpace? LdCacheOperator? Vector? BaseType <dst:ID> "," "[" <src:Operand> "]" => {
        ast::Instruction::Ld(ast::LdData{}, ast::Arg2{dst:dst, src:src})
    }
};

LdQualifier: () = {
    ".weak",
    ".volatile",
    ".relaxed" LdScope,
    ".acquire" LdScope,
};

LdScope = {
    ".cta", ".gpu", ".sys"
};

LdStateSpace = {
    ".const",
    ".global",
    ".local",
    ".param",
    ".shared",
};

LdCacheOperator = {
    ".ca",
    ".cg",
    ".cs",
    ".lu",
    ".cv",
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-mov
InstMov: ast::Instruction<&'input str> = {
    "mov" MovType <a:Arg2Mov> => {
        ast::Instruction::Mov(ast::MovData{}, a)
    }
};

MovType = {
    ".b16", ".b32", ".b64",
    ".u16", ".u32", ".u64",
    ".s16", ".s32", ".s64",
    ".f32", ".f64",
    ".pred"
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-mul
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-mul
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-mul
InstMul: ast::Instruction<&'input str> = {
    "mul" <d:InstMulMode> <a:Arg3> => ast::Instruction::Mul(d, a)
};

InstMulMode: ast::MulData = {
    MulIntControl? IntType => ast::MulData{},
    RoundingMode? ".ftz"? ".sat"? ".f32" => ast::MulData{},
    RoundingMode? ".f64" => ast::MulData{},
    ".rn"? ".ftz"? ".sat"? ".f16" => ast::MulData{},
    ".rn"? ".ftz"? ".sat"? ".f16x2" => ast::MulData{}
};

MulIntControl = {
    "hi", ".lo", ".wide"
};

#[inline]
RoundingMode = {
    ".rn", ".rz", ".rm", ".rp"
};

IntType = {
    ".u16", ".u32", ".u64",
    ".s16", ".s32", ".s64",
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add
InstAdd: ast::Instruction<&'input str> = {
    "add" <d:InstAddMode> <a:Arg3> => ast::Instruction::Add(d, a)
};

InstAddMode: ast::AddData = {
    IntType => ast::AddData{},
    ".sat" ".s32" => ast::AddData{},
    RoundingMode? ".ftz"? ".sat"? ".f32" => ast::AddData{},
    RoundingMode? ".f64" => ast::AddData{},
    ".rn"? ".ftz"? ".sat"? ".f16" => ast::AddData{},
    ".rn"? ".ftz"? ".sat"? ".f16x2" => ast::AddData{}
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-setp
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-comparison-instructions-setp
// TODO: support f16 setp
InstSetp: ast::Instruction<&'input str> = {
    "setp" <d:SetpMode> <a:Arg4> => ast::Instruction::Setp(d, a),
    "setp" <d:SetpBoolMode> <a:Arg5> => ast::Instruction::SetpBool(d, a),
};

SetpMode: ast::SetpData = {
    SetpCmpOp ".ftz"? SetpType => ast::SetpData{}
};

SetpBoolMode: ast::SetpBoolData = {
    SetpCmpOp SetpBoolOp ".ftz"? SetpType => ast::SetpBoolData{}
};

SetpCmpOp = {
    ".eq", ".ne", ".lt", ".le", ".gt", ".ge", ".lo", ".ls", ".hi", ".hs",
    ".equ", ".neu", ".ltu", ".leu", ".gtu", ".geu", ".num", ".nan"
};

SetpBoolOp = {
    ".and", ".or", ".xor"
};

SetpType = {
    ".b16", ".b32", ".b64",
    ".u16", ".u32", ".u64",
    ".s16", ".s32", ".s64",
    ".f32", ".f64"
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-not
InstNot: ast::Instruction<&'input str> = {
    "not" NotType <a:Arg2> => ast::Instruction::Not(ast::NotData{}, a)
};

NotType = {
    ".pred", ".b16", ".b32", ".b64"
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-at
PredAt: ast::PredAt<&'input str> = {
    "@" <label:ID> => ast::PredAt { not: false, label:label },
    "@" "!" <label:ID> => ast::PredAt { not: true, label:label }
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-bra
InstBra: ast::Instruction<&'input str> = {
    "bra" ".uni"? <a:Arg1> => ast::Instruction::Bra(ast::BraData{}, a)
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt
InstCvt: ast::Instruction<&'input str> = {
    "cvt" CvtRnd? ".ftz"? ".sat"? CvtType CvtType <a:Arg2> => {
        ast::Instruction::Cvt(ast::CvtData{}, a)
    }
};

CvtRnd = {
    CvtIrnd,
    CvtFrnd
}

CvtIrnd = {
    ".rni", ".rzi", ".rmi", ".rpi"
};

CvtFrnd = {
    ".rn",  ".rz",  ".rm", ".rp"
};

CvtType = {
    ".u8", ".u16", ".u32", ".u64",
    ".s8", ".s16", ".s32", ".s64",
    ".f16", ".f32", ".f64"
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-shl
InstShl: ast::Instruction<&'input str> = {
    "shl" ShlType <a:Arg3> => ast::Instruction::Shl(ast::ShlData{}, a)
};

ShlType = {
    ".b16", ".b32", ".b64"
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-st
InstSt: ast::Instruction<&'input str> = {
    "st" LdQualifier? StStateSpace? StCacheOperator? Vector? BaseType "[" <dst:ID> "]" "," <src:Operand> => {
        ast::Instruction::St(ast::StData{}, ast::Arg2{dst:dst, src:src})
    }
};

StStateSpace = {
    ".global",
    ".local",
    ".param",
    ".shared",
};

StCacheOperator = {
    ".wb",
    ".cg",
    ".cs",
    ".wt",
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
InstRet: ast::Instruction<&'input str> = {
    "ret" ".uni"? => ast::Instruction::Ret(ast::RetData{})
};

Operand: ast::Operand<&'input str> = {
    <r:ID> => ast::Operand::Reg(r),
    <r:ID> "+" <o:Num> => {
        let offset = o.parse::<i32>();
        let offset = offset.unwrap_with(errors);
        ast::Operand::RegOffset(r, offset)
    },
    // TODO: start parsing whole constants sub-language:
    //       https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#constants
    <o:Num> => {
        let offset = o.parse::<i128>();
        let offset = offset.unwrap_with(errors);
        ast::Operand::Imm(offset)
    }
};

MovOperand: ast::MovOperand<&'input str> = {
    <o:Operand> => ast::MovOperand::Op(o),
    <o:VectorOperand> => {
        let (pref, suf) = o;
        ast::MovOperand::Vec(pref.to_string(), suf.to_string())
    }
};

VectorOperand: (&'input str, &'input str) = {
    <pref:ID> "." <suf:ID> => (pref, suf),
    <pref:ID> <suf:DotID> => (pref, &suf[1..]),
};

Arg1: ast::Arg1<&'input str> = {
    <src:ID> => ast::Arg1{<>}
};

Arg2: ast::Arg2<&'input str> = {
    <dst:ID> "," <src:Operand> => ast::Arg2{<>}
};

Arg2Mov: ast::Arg2Mov<&'input str> = {
    <dst:ID> "," <src:MovOperand> => ast::Arg2Mov{<>}
};

Arg3: ast::Arg3<&'input str> = {
    <dst:ID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
};

Arg4: ast::Arg4<&'input str> = {
    <dst1:ID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4{<>}
};

// TODO: pass src3 negation somewhere
Arg5: ast::Arg5<&'input str> = {
    <dst1:ID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> "," "!"? <src3:Operand> => ast::Arg5{<>}
};

OptionalDst: &'input str = {
    "|" <dst2:ID> => dst2
}

Vector = {
    ".v2",
    ".v4"
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-file
File = {
    ".file" Num String ("," Num "," Num)?
};

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#debugging-directives-section
Section = {
    ".section" DotID "{" SectionDwarfLines* "}"
};

SectionDwarfLines: () = {
    BitType Comma<Num>,
    ".b32" SectionLabel,
    ".b64" SectionLabel,
    ".b32" SectionLabel "+" Num,
    ".b64" SectionLabel "+" Num,
};

SectionLabel = {
    ID,
    DotID
};

BitType = {
    ".b8", ".b16", ".b32", ".b64"
};

Comma<T>: Vec<T> = {
    <v:(<T> ",")*> <e:T?> => match e {
        None => v,
        Some(e) => {
            let mut v = v;
            v.push(e);
            v
        }
    }
};

String = r#""[^"]*""#;
VersionNumber = r"[0-9]+\.[0-9]+";
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#identifiers
ID: &'input str = <s:r"[a-zA-Z][a-zA-Z0-9_$]*|[_$%][a-zA-Z0-9_$]+"> => s;
DotID: &'input str = <s:r"\.[a-zA-Z][a-zA-Z0-9_$]*"> => s;