Re-enable all failing PTX tests (#277)

Additionally remove unused compilation paths
This commit is contained in:
Andrzej Janik
2024-10-16 03:15:48 +02:00
committed by GitHub
parent 1a63ef62b7
commit 3870a96592
138 changed files with 3047 additions and 25651 deletions

37
.devcontainer/Dockerfile Normal file
View File

@ -0,0 +1,37 @@
FROM nvidia/cuda:12.4.1-base-ubuntu22.04
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
wget \
build-essential \
cmake \
ninja-build \
python3 \
ripgrep \
git \
ltrace
# Feel free to change to a newer version if you have a newer verison on your host
ARG CUDA_VERSION=12-4
# Docker <-> host driver version compatiblity is newer host <-> older docker
# We don't care about a specific driver version, so pick oldest 5XX
ARG CUDA_DRIVER=515
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
nvidia-utils-${CUDA_DRIVER} \
cuda-cudart-${CUDA_VERSION}
ARG ROCM_VERSION=6.2.2
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} jammy main" > /etc/apt/sources.list.d/rocm.list && \
echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \
DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
rocminfo \
rocm-gdb \
rocm-smi-lib \
hip-runtime-amd && \
echo '/opt/rocm/lib' > /etc/ld.so.conf.d/rocm.conf && \
ldconfig
ENV PATH=$PATH:/opt/rocm-6.2.2/bin

View File

@ -0,0 +1,34 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/rust
{
"name": "zluda",
"build": {
"dockerfile": "Dockerfile"
},
"securityOpt": [ "seccomp=unconfined" ],
"runArgs": [
"--runtime=nvidia",
"--device=/dev/kfd",
"--device=/dev/dri",
"--group-add=video"
],
"mounts": [
{
"source": "${localEnv:HOME}/.cargo/",
"target": "/root/.cargo",
"type": "bind"
}
],
// https://containers.dev/features.
"features": {
"ghcr.io/devcontainers/features/rust:1": {}
},
// https://aka.ms/dev-containers-non-root.
"remoteUser": "root",
//"hostRequirements": { "gpu": "optional" }
"customizations": {
"vscode": {
"extensions": [ "mhutchie.git-graph" ],
}
}
}

View File

@ -25,7 +25,3 @@ members = [
] ]
default-members = ["zluda_lib", "zluda_ml", "zluda_inject", "zluda_redirect"] default-members = ["zluda_lib", "zluda_ml", "zluda_inject", "zluda_redirect"]
[patch.crates-io]
rspirv = { git = 'https://github.com/vosen/rspirv', rev = '9826e59a232c4a426482cda12f88d11bfda3ff9c' }
spirv_headers = { git = 'https://github.com/vosen/rspirv', rev = '9826e59a232c4a426482cda12f88d11bfda3ff9c' }

View File

@ -1,5 +1,5 @@
use amd_comgr_sys::*; use amd_comgr_sys::*;
use std::{ffi::CStr, mem, ptr}; use std::{ffi::CStr, iter, mem, ptr};
struct Data(amd_comgr_data_t); struct Data(amd_comgr_data_t);
@ -79,6 +79,24 @@ impl ActionInfo {
unsafe { amd_comgr_action_info_set_isa_name(self.get(), full_isa.as_ptr().cast()) } unsafe { amd_comgr_action_info_set_isa_name(self.get(), full_isa.as_ptr().cast()) }
} }
fn set_language(&self, language: amd_comgr_language_t) -> Result<(), amd_comgr_status_s> {
unsafe { amd_comgr_action_info_set_language(self.get(), language) }
}
fn set_options<'a>(
&self,
options: impl Iterator<Item = &'a CStr>,
) -> Result<(), amd_comgr_status_s> {
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
unsafe {
amd_comgr_action_info_set_option_list(
self.get(),
options.as_ptr().cast_mut(),
options.len(),
)
}
}
fn get(&self) -> amd_comgr_action_info_t { fn get(&self) -> amd_comgr_action_info_t {
self.0 self.0
} }
@ -90,36 +108,62 @@ impl Drop for ActionInfo {
} }
} }
pub fn compile_bitcode(gcn_arch: &CStr, buffer: &[u8]) -> Result<Vec<u8>, amd_comgr_status_s> { pub fn compile_bitcode(
gcn_arch: &CStr,
main_buffer: &[u8],
ptx_impl: &[u8],
) -> Result<Vec<u8>, amd_comgr_status_s> {
use amd_comgr_sys::*; use amd_comgr_sys::*;
let bitcode_data_set = DataSet::new()?; let bitcode_data_set = DataSet::new()?;
let bitcode_data = Data::new( let main_bitcode_data = Data::new(
amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_BC, amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_BC,
c"zluda.bc", c"zluda.bc",
buffer, main_buffer,
)?;
bitcode_data_set.add(&main_bitcode_data)?;
let stdlib_bitcode_data = Data::new(
amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_BC,
c"ptx_impl.bc",
ptx_impl,
)?;
bitcode_data_set.add(&stdlib_bitcode_data)?;
let lang_action_info = ActionInfo::new()?;
lang_action_info.set_isa_name(gcn_arch)?;
lang_action_info.set_language(amd_comgr_language_t::AMD_COMGR_LANGUAGE_LLVM_IR)?;
let with_device_libs = do_action(
&bitcode_data_set,
&lang_action_info,
amd_comgr_action_kind_t::AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC,
)?;
let linked_data_set = do_action(
&with_device_libs,
&lang_action_info,
amd_comgr_action_kind_t::AMD_COMGR_ACTION_LINK_BC_TO_BC,
)?;
let compile_action_info = ActionInfo::new()?;
compile_action_info.set_isa_name(gcn_arch)?;
compile_action_info.set_options(iter::once(c"-O3"))?;
let reloc_data_set = do_action(
&linked_data_set,
&compile_action_info,
amd_comgr_action_kind_t::AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE,
)?;
let exec_data_set = do_action(
&reloc_data_set,
&compile_action_info,
amd_comgr_action_kind_t::AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE,
)?; )?;
bitcode_data_set.add(&bitcode_data)?;
let reloc_data_set = DataSet::new()?;
let action_info = ActionInfo::new()?;
action_info.set_isa_name(gcn_arch)?;
unsafe {
amd_comgr_do_action(
amd_comgr_action_kind_t::AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE,
action_info.get(),
bitcode_data_set.get(),
reloc_data_set.get(),
)
}?;
let exec_data_set = DataSet::new()?;
unsafe {
amd_comgr_do_action(
amd_comgr_action_kind_t::AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE,
action_info.get(),
reloc_data_set.get(),
exec_data_set.get(),
)
}?;
let executable = let executable =
exec_data_set.get_data(amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_EXECUTABLE, 0)?; exec_data_set.get_data(amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_EXECUTABLE, 0)?;
executable.copy_content() executable.copy_content()
} }
fn do_action(
data_set: &DataSet,
action: &ActionInfo,
kind: amd_comgr_action_kind_t,
) -> Result<DataSet, amd_comgr_status_s> {
let result = DataSet::new()?;
unsafe { amd_comgr_do_action(kind, action.get(), data_set.get(), result.get()) }?;
Ok(result)
}

View File

@ -1,6 +1,144 @@
#include <llvm-c/Core.h> #include <llvm-c/Core.h>
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Type.h" #include "llvm/IR/Type.h"
#include "llvm/IR/Instructions.h"
using namespace llvm;
typedef enum
{
LLVMZludaAtomicRMWBinOpXchg, /**< Set the new value and return the one old */
LLVMZludaAtomicRMWBinOpAdd, /**< Add a value and return the old one */
LLVMZludaAtomicRMWBinOpSub, /**< Subtract a value and return the old one */
LLVMZludaAtomicRMWBinOpAnd, /**< And a value and return the old one */
LLVMZludaAtomicRMWBinOpNand, /**< Not-And a value and return the old one */
LLVMZludaAtomicRMWBinOpOr, /**< OR a value and return the old one */
LLVMZludaAtomicRMWBinOpXor, /**< Xor a value and return the old one */
LLVMZludaAtomicRMWBinOpMax, /**< Sets the value if it's greater than the
original using a signed comparison and return
the old one */
LLVMZludaAtomicRMWBinOpMin, /**< Sets the value if it's Smaller than the
original using a signed comparison and return
the old one */
LLVMZludaAtomicRMWBinOpUMax, /**< Sets the value if it's greater than the
original using an unsigned comparison and return
the old one */
LLVMZludaAtomicRMWBinOpUMin, /**< Sets the value if it's greater than the
original using an unsigned comparison and return
the old one */
LLVMZludaAtomicRMWBinOpFAdd, /**< Add a floating point value and return the
old one */
LLVMZludaAtomicRMWBinOpFSub, /**< Subtract a floating point value and return the
old one */
LLVMZludaAtomicRMWBinOpFMax, /**< Sets the value if it's greater than the
original using an floating point comparison and
return the old one */
LLVMZludaAtomicRMWBinOpFMin, /**< Sets the value if it's smaller than the
original using an floating point comparison and
return the old one */
LLVMZludaAtomicRMWBinOpUIncWrap, /**< Increments the value, wrapping back to zero
when incremented above input value */
LLVMZludaAtomicRMWBinOpUDecWrap, /**< Decrements the value, wrapping back to
the input value when decremented below zero */
} LLVMZludaAtomicRMWBinOp;
static llvm::AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMZludaAtomicRMWBinOp BinOp)
{
switch (BinOp)
{
case LLVMZludaAtomicRMWBinOpXchg:
return llvm::AtomicRMWInst::Xchg;
case LLVMZludaAtomicRMWBinOpAdd:
return llvm::AtomicRMWInst::Add;
case LLVMZludaAtomicRMWBinOpSub:
return llvm::AtomicRMWInst::Sub;
case LLVMZludaAtomicRMWBinOpAnd:
return llvm::AtomicRMWInst::And;
case LLVMZludaAtomicRMWBinOpNand:
return llvm::AtomicRMWInst::Nand;
case LLVMZludaAtomicRMWBinOpOr:
return llvm::AtomicRMWInst::Or;
case LLVMZludaAtomicRMWBinOpXor:
return llvm::AtomicRMWInst::Xor;
case LLVMZludaAtomicRMWBinOpMax:
return llvm::AtomicRMWInst::Max;
case LLVMZludaAtomicRMWBinOpMin:
return llvm::AtomicRMWInst::Min;
case LLVMZludaAtomicRMWBinOpUMax:
return llvm::AtomicRMWInst::UMax;
case LLVMZludaAtomicRMWBinOpUMin:
return llvm::AtomicRMWInst::UMin;
case LLVMZludaAtomicRMWBinOpFAdd:
return llvm::AtomicRMWInst::FAdd;
case LLVMZludaAtomicRMWBinOpFSub:
return llvm::AtomicRMWInst::FSub;
case LLVMZludaAtomicRMWBinOpFMax:
return llvm::AtomicRMWInst::FMax;
case LLVMZludaAtomicRMWBinOpFMin:
return llvm::AtomicRMWInst::FMin;
case LLVMZludaAtomicRMWBinOpUIncWrap:
return llvm::AtomicRMWInst::UIncWrap;
case LLVMZludaAtomicRMWBinOpUDecWrap:
return llvm::AtomicRMWInst::UDecWrap;
}
llvm_unreachable("Invalid LLVMZludaAtomicRMWBinOp value!");
}
static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering)
{
switch (Ordering)
{
case LLVMAtomicOrderingNotAtomic:
return AtomicOrdering::NotAtomic;
case LLVMAtomicOrderingUnordered:
return AtomicOrdering::Unordered;
case LLVMAtomicOrderingMonotonic:
return AtomicOrdering::Monotonic;
case LLVMAtomicOrderingAcquire:
return AtomicOrdering::Acquire;
case LLVMAtomicOrderingRelease:
return AtomicOrdering::Release;
case LLVMAtomicOrderingAcquireRelease:
return AtomicOrdering::AcquireRelease;
case LLVMAtomicOrderingSequentiallyConsistent:
return AtomicOrdering::SequentiallyConsistent;
}
llvm_unreachable("Invalid LLVMAtomicOrdering value!");
}
typedef unsigned LLVMFastMathFlags;
enum
{
LLVMFastMathAllowReassoc = (1 << 0),
LLVMFastMathNoNaNs = (1 << 1),
LLVMFastMathNoInfs = (1 << 2),
LLVMFastMathNoSignedZeros = (1 << 3),
LLVMFastMathAllowReciprocal = (1 << 4),
LLVMFastMathAllowContract = (1 << 5),
LLVMFastMathApproxFunc = (1 << 6),
LLVMFastMathNone = 0,
LLVMFastMathAll = LLVMFastMathAllowReassoc | LLVMFastMathNoNaNs |
LLVMFastMathNoInfs | LLVMFastMathNoSignedZeros |
LLVMFastMathAllowReciprocal | LLVMFastMathAllowContract |
LLVMFastMathApproxFunc,
};
static FastMathFlags mapFromLLVMFastMathFlags(LLVMFastMathFlags FMF)
{
FastMathFlags NewFMF;
NewFMF.setAllowReassoc((FMF & LLVMFastMathAllowReassoc) != 0);
NewFMF.setNoNaNs((FMF & LLVMFastMathNoNaNs) != 0);
NewFMF.setNoInfs((FMF & LLVMFastMathNoInfs) != 0);
NewFMF.setNoSignedZeros((FMF & LLVMFastMathNoSignedZeros) != 0);
NewFMF.setAllowReciprocal((FMF & LLVMFastMathAllowReciprocal) != 0);
NewFMF.setAllowContract((FMF & LLVMFastMathAllowContract) != 0);
NewFMF.setApproxFunc((FMF & LLVMFastMathApproxFunc) != 0);
return NewFMF;
}
LLVM_C_EXTERN_C_BEGIN LLVM_C_EXTERN_C_BEGIN
@ -10,4 +148,48 @@ LLVMValueRef LLVMZludaBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, unsigned Add
return llvm::wrap(llvm::unwrap(B)->CreateAlloca(llvm::unwrap(Ty), AddrSpace, nullptr, Name)); return llvm::wrap(llvm::unwrap(B)->CreateAlloca(llvm::unwrap(Ty), AddrSpace, nullptr, Name));
} }
LLVMValueRef LLVMZludaBuildAtomicRMW(LLVMBuilderRef B, LLVMZludaAtomicRMWBinOp op,
LLVMValueRef PTR, LLVMValueRef Val,
char *scope,
LLVMAtomicOrdering ordering)
{
auto builder = llvm::unwrap(B);
LLVMContext &context = builder->getContext();
llvm::AtomicRMWInst::BinOp intop = mapFromLLVMRMWBinOp(op);
return llvm::wrap(builder->CreateAtomicRMW(
intop, llvm::unwrap(PTR), llvm::unwrap(Val), llvm::MaybeAlign(),
mapFromLLVMOrdering(ordering),
context.getOrInsertSyncScopeID(scope)));
}
LLVMValueRef LLVMZludaBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr,
LLVMValueRef Cmp, LLVMValueRef New,
char *scope,
LLVMAtomicOrdering SuccessOrdering,
LLVMAtomicOrdering FailureOrdering)
{
auto builder = llvm::unwrap(B);
LLVMContext &context = builder->getContext();
return wrap(builder->CreateAtomicCmpXchg(
unwrap(Ptr), unwrap(Cmp), unwrap(New), MaybeAlign(),
mapFromLLVMOrdering(SuccessOrdering),
mapFromLLVMOrdering(FailureOrdering),
context.getOrInsertSyncScopeID(scope)));
}
void LLVMZludaSetFastMathFlags(LLVMValueRef FPMathInst, LLVMFastMathFlags FMF)
{
Value *P = unwrap<Value>(FPMathInst);
cast<Instruction>(P)->setFastMathFlags(mapFromLLVMFastMathFlags(FMF));
}
void LLVMZludaBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
char *scope, const char *Name)
{
auto builder = llvm::unwrap(B);
LLVMContext &context = builder->getContext();
builder->CreateFence(mapFromLLVMOrdering(Ordering),
context.getOrInsertSyncScopeID(scope));
}
LLVM_C_EXTERN_C_END LLVM_C_EXTERN_C_END

View File

@ -1,5 +1,48 @@
#![allow(non_upper_case_globals)]
use llvm_sys::prelude::*; use llvm_sys::prelude::*;
pub use llvm_sys::*; pub use llvm_sys::*;
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum LLVMZludaAtomicRMWBinOp {
LLVMZludaAtomicRMWBinOpXchg = 0,
LLVMZludaAtomicRMWBinOpAdd = 1,
LLVMZludaAtomicRMWBinOpSub = 2,
LLVMZludaAtomicRMWBinOpAnd = 3,
LLVMZludaAtomicRMWBinOpNand = 4,
LLVMZludaAtomicRMWBinOpOr = 5,
LLVMZludaAtomicRMWBinOpXor = 6,
LLVMZludaAtomicRMWBinOpMax = 7,
LLVMZludaAtomicRMWBinOpMin = 8,
LLVMZludaAtomicRMWBinOpUMax = 9,
LLVMZludaAtomicRMWBinOpUMin = 10,
LLVMZludaAtomicRMWBinOpFAdd = 11,
LLVMZludaAtomicRMWBinOpFSub = 12,
LLVMZludaAtomicRMWBinOpFMax = 13,
LLVMZludaAtomicRMWBinOpFMin = 14,
LLVMZludaAtomicRMWBinOpUIncWrap = 15,
LLVMZludaAtomicRMWBinOpUDecWrap = 16,
}
// Backport from LLVM 19
pub const LLVMZludaFastMathAllowReassoc: ::std::ffi::c_uint = 1 << 0;
pub const LLVMZludaFastMathNoNaNs: ::std::ffi::c_uint = 1 << 1;
pub const LLVMZludaFastMathNoInfs: ::std::ffi::c_uint = 1 << 2;
pub const LLVMZludaFastMathNoSignedZeros: ::std::ffi::c_uint = 1 << 3;
pub const LLVMZludaFastMathAllowReciprocal: ::std::ffi::c_uint = 1 << 4;
pub const LLVMZludaFastMathAllowContract: ::std::ffi::c_uint = 1 << 5;
pub const LLVMZludaFastMathApproxFunc: ::std::ffi::c_uint = 1 << 6;
pub const LLVMZludaFastMathNone: ::std::ffi::c_uint = 0;
pub const LLVMZludaFastMathAll: ::std::ffi::c_uint = LLVMZludaFastMathAllowReassoc
| LLVMZludaFastMathNoNaNs
| LLVMZludaFastMathNoInfs
| LLVMZludaFastMathNoSignedZeros
| LLVMZludaFastMathAllowReciprocal
| LLVMZludaFastMathAllowContract
| LLVMZludaFastMathApproxFunc;
pub type LLVMZludaFastMathFlags = std::ffi::c_uint;
extern "C" { extern "C" {
pub fn LLVMZludaBuildAlloca( pub fn LLVMZludaBuildAlloca(
B: LLVMBuilderRef, B: LLVMBuilderRef,
@ -7,4 +50,32 @@ extern "C" {
AddrSpace: u32, AddrSpace: u32,
Name: *const i8, Name: *const i8,
) -> LLVMValueRef; ) -> LLVMValueRef;
pub fn LLVMZludaBuildAtomicRMW(
B: LLVMBuilderRef,
op: LLVMZludaAtomicRMWBinOp,
PTR: LLVMValueRef,
Val: LLVMValueRef,
scope: *const i8,
ordering: LLVMAtomicOrdering,
) -> LLVMValueRef;
pub fn LLVMZludaBuildAtomicCmpXchg(
B: LLVMBuilderRef,
Ptr: LLVMValueRef,
Cmp: LLVMValueRef,
New: LLVMValueRef,
scope: *const i8,
SuccessOrdering: LLVMAtomicOrdering,
FailureOrdering: LLVMAtomicOrdering,
) -> LLVMValueRef;
pub fn LLVMZludaSetFastMathFlags(FPMathInst: LLVMValueRef, FMF: LLVMZludaFastMathFlags);
pub fn LLVMZludaBuildFence(
B: LLVMBuilderRef,
ordering: LLVMAtomicOrdering,
scope: *const i8,
Name: *const i8,
) -> LLVMValueRef;
} }

View File

@ -9,9 +9,6 @@ edition = "2021"
[dependencies] [dependencies]
ptx_parser = { path = "../ptx_parser" } ptx_parser = { path = "../ptx_parser" }
llvm_zluda = { path = "../llvm_zluda" } llvm_zluda = { path = "../llvm_zluda" }
regex = "1"
rspirv = "0.7"
spirv_headers = "1.5"
quick-error = "1.2" quick-error = "1.2"
thiserror = "1.0" thiserror = "1.0"
bit-vec = "0.6" bit-vec = "0.6"
@ -21,18 +18,9 @@ rustc-hash = "2.0.0"
strum = "0.26" strum = "0.26"
strum_macros = "0.26" strum_macros = "0.26"
[dependencies.lalrpop-util]
version = "0.19.12"
features = ["lexer"]
[build-dependencies.lalrpop]
version = "0.19.12"
features = ["lexer"]
[dev-dependencies] [dev-dependencies]
hip_runtime-sys = { path = "../ext/hip_runtime-sys" } hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
comgr = { path = "../comgr" } comgr = { path = "../comgr" }
spirv_tools-sys = { path = "../spirv_tools-sys" }
tempfile = "3" tempfile = "3"
paste = "1.0" paste = "1.0"
cuda-driver-sys = "0.3.0" cuda-driver-sys = "0.3.0"

View File

@ -1,5 +0,0 @@
extern crate lalrpop;
fn main() {
lalrpop::process_root().unwrap();
}

Binary file not shown.

View File

@ -1,344 +0,0 @@
// Every time this file changes it must te rebuilt:
// ocloc -file zluda_ptx_impl.cl -64 -options "-cl-std=CL2.0 -Dcl_intel_bit_instructions -DINTEL" -out_dir . -device kbl -output_no_suffix -spv_only
// /opt/rocm/llvm/bin/clang -Wall -Wextra -Wsign-compare -Wconversion -x cl -Xclang -finclude-default-header zluda_ptx_impl.cl -cl-std=CL2.0 -c -target amdgcn-amd-amdhsa -o zluda_ptx_impl.bc -emit-llvm
// Additionally you should strip names:
// spirv-opt --strip-debug zluda_ptx_impl.spv -o zluda_ptx_impl.spv --target-env=spv1.3
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
#define FUNC(NAME) __zluda_ptx_impl__ ## NAME
#define atomic_inc(NAME, SUCCESS, FAILURE, SCOPE, SPACE) \
uint FUNC(NAME)(SPACE uint* ptr, uint threshold) { \
uint expected = *ptr; \
uint desired; \
do { \
desired = (expected >= threshold) ? 0 : expected + 1; \
} while (!atomic_compare_exchange_strong_explicit((volatile SPACE atomic_uint*)ptr, &expected, desired, SUCCESS, FAILURE, SCOPE)); \
return expected; \
}
#define atomic_dec(NAME, SUCCESS, FAILURE, SCOPE, SPACE) \
uint FUNC(NAME)(SPACE uint* ptr, uint threshold) { \
uint expected = *ptr; \
uint desired; \
do { \
desired = (expected == 0 || expected > threshold) ? threshold : expected - 1; \
} while (!atomic_compare_exchange_strong_explicit((volatile SPACE atomic_uint*)ptr, &expected, desired, SUCCESS, FAILURE, SCOPE)); \
return expected; \
}
#define atomic_add(NAME, SUCCESS, FAILURE, SCOPE, SPACE, TYPE, ATOMIC_TYPE, INT_TYPE) \
TYPE FUNC(NAME)(SPACE TYPE* ptr, TYPE value) { \
volatile SPACE ATOMIC_TYPE* atomic_ptr = (volatile SPACE ATOMIC_TYPE*)ptr; \
union { \
INT_TYPE int_view; \
TYPE float_view; \
} expected, desired; \
expected.float_view = *ptr; \
do { \
desired.float_view = expected.float_view + value; \
} while (!atomic_compare_exchange_strong_explicit(atomic_ptr, &expected.int_view, desired.int_view, SUCCESS, FAILURE, SCOPE)); \
return expected.float_view; \
}
// We are doing all this mess instead of accepting memory_order and memory_scope parameters
// because ocloc emits broken (failing spirv-dis) SPIR-V when memory_order or memory_scope is a parameter
// atom.inc
atomic_inc(atom_relaxed_cta_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, );
atomic_inc(atom_acquire_cta_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, );
atomic_inc(atom_release_cta_generic_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, );
atomic_inc(atom_acq_rel_cta_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, );
atomic_inc(atom_relaxed_gpu_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
atomic_inc(atom_acquire_gpu_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, );
atomic_inc(atom_release_gpu_generic_inc, memory_order_release, memory_order_acquire, memory_scope_device, );
atomic_inc(atom_acq_rel_gpu_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
atomic_inc(atom_relaxed_sys_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
atomic_inc(atom_acquire_sys_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, );
atomic_inc(atom_release_sys_generic_inc, memory_order_release, memory_order_acquire, memory_scope_device, );
atomic_inc(atom_acq_rel_sys_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
atomic_inc(atom_relaxed_cta_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global);
atomic_inc(atom_acquire_cta_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global);
atomic_inc(atom_release_cta_global_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, __global);
atomic_inc(atom_acq_rel_cta_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global);
atomic_inc(atom_relaxed_gpu_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
atomic_inc(atom_acquire_gpu_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
atomic_inc(atom_release_gpu_global_inc, memory_order_release, memory_order_acquire, memory_scope_device, __global);
atomic_inc(atom_acq_rel_gpu_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
atomic_inc(atom_relaxed_sys_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
atomic_inc(atom_acquire_sys_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
atomic_inc(atom_release_sys_global_inc, memory_order_release, memory_order_acquire, memory_scope_device, __global);
atomic_inc(atom_acq_rel_sys_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
atomic_inc(atom_relaxed_cta_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local);
atomic_inc(atom_acquire_cta_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local);
atomic_inc(atom_release_cta_shared_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, __local);
atomic_inc(atom_acq_rel_cta_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local);
atomic_inc(atom_relaxed_gpu_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
atomic_inc(atom_acquire_gpu_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
atomic_inc(atom_release_gpu_shared_inc, memory_order_release, memory_order_acquire, memory_scope_device, __local);
atomic_inc(atom_acq_rel_gpu_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
atomic_inc(atom_relaxed_sys_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
atomic_inc(atom_acquire_sys_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
atomic_inc(atom_release_sys_shared_inc, memory_order_release, memory_order_acquire, memory_scope_device, __local);
atomic_inc(atom_acq_rel_sys_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
// atom.dec
atomic_dec(atom_relaxed_cta_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, );
atomic_dec(atom_acquire_cta_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, );
atomic_dec(atom_release_cta_generic_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, );
atomic_dec(atom_acq_rel_cta_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, );
atomic_dec(atom_relaxed_gpu_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
atomic_dec(atom_acquire_gpu_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, );
atomic_dec(atom_release_gpu_generic_dec, memory_order_release, memory_order_acquire, memory_scope_device, );
atomic_dec(atom_acq_rel_gpu_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
atomic_dec(atom_relaxed_sys_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
atomic_dec(atom_acquire_sys_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, );
atomic_dec(atom_release_sys_generic_dec, memory_order_release, memory_order_acquire, memory_scope_device, );
atomic_dec(atom_acq_rel_sys_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
atomic_dec(atom_relaxed_cta_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global);
atomic_dec(atom_acquire_cta_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global);
atomic_dec(atom_release_cta_global_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, __global);
atomic_dec(atom_acq_rel_cta_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global);
atomic_dec(atom_relaxed_gpu_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
atomic_dec(atom_acquire_gpu_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
atomic_dec(atom_release_gpu_global_dec, memory_order_release, memory_order_acquire, memory_scope_device, __global);
atomic_dec(atom_acq_rel_gpu_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
atomic_dec(atom_relaxed_sys_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
atomic_dec(atom_acquire_sys_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
atomic_dec(atom_release_sys_global_dec, memory_order_release, memory_order_acquire, memory_scope_device, __global);
atomic_dec(atom_acq_rel_sys_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
atomic_dec(atom_relaxed_cta_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local);
atomic_dec(atom_acquire_cta_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local);
atomic_dec(atom_release_cta_shared_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, __local);
atomic_dec(atom_acq_rel_cta_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local);
atomic_dec(atom_relaxed_gpu_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
atomic_dec(atom_acquire_gpu_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
atomic_dec(atom_acq_rel_sys_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
// atom.add.f32
atomic_add(atom_relaxed_cta_generic_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, , float, atomic_uint, uint);
atomic_add(atom_acquire_cta_generic_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_work_group, , float, atomic_uint, uint);
atomic_add(atom_release_cta_generic_add_f32, memory_order_release, memory_order_acquire, memory_scope_work_group, , float, atomic_uint, uint);
atomic_add(atom_acq_rel_cta_generic_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, , float, atomic_uint, uint);
atomic_add(atom_relaxed_gpu_generic_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_acquire_gpu_generic_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_release_gpu_generic_add_f32, memory_order_release, memory_order_acquire, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_acq_rel_gpu_generic_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_relaxed_sys_generic_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_acquire_sys_generic_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_release_sys_generic_add_f32, memory_order_release, memory_order_acquire, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_acq_rel_sys_generic_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_device, , float, atomic_uint, uint);
atomic_add(atom_relaxed_cta_global_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global, float, atomic_uint, uint);
atomic_add(atom_acquire_cta_global_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global, float, atomic_uint, uint);
atomic_add(atom_release_cta_global_add_f32, memory_order_release, memory_order_acquire, memory_scope_work_group, __global, float, atomic_uint, uint);
atomic_add(atom_acq_rel_cta_global_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global, float, atomic_uint, uint);
atomic_add(atom_relaxed_gpu_global_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_acquire_gpu_global_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_release_gpu_global_add_f32, memory_order_release, memory_order_acquire, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_acq_rel_gpu_global_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_relaxed_sys_global_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_acquire_sys_global_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_release_sys_global_add_f32, memory_order_release, memory_order_acquire, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_acq_rel_sys_global_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global, float, atomic_uint, uint);
atomic_add(atom_relaxed_cta_shared_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local, float, atomic_uint, uint);
atomic_add(atom_acquire_cta_shared_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local, float, atomic_uint, uint);
atomic_add(atom_release_cta_shared_add_f32, memory_order_release, memory_order_acquire, memory_scope_work_group, __local, float, atomic_uint, uint);
atomic_add(atom_acq_rel_cta_shared_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local, float, atomic_uint, uint);
atomic_add(atom_relaxed_gpu_shared_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_acquire_gpu_shared_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_release_gpu_shared_add_f32, memory_order_release, memory_order_acquire, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_acq_rel_gpu_shared_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_relaxed_sys_shared_add_f32, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_acquire_sys_shared_add_f32, memory_order_acquire, memory_order_acquire, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_release_sys_shared_add_f32, memory_order_release, memory_order_acquire, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_acq_rel_sys_shared_add_f32, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local, float, atomic_uint, uint);
atomic_add(atom_relaxed_cta_generic_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, , double, atomic_ulong, ulong);
atomic_add(atom_acquire_cta_generic_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_work_group, , double, atomic_ulong, ulong);
atomic_add(atom_release_cta_generic_add_f64, memory_order_release, memory_order_acquire, memory_scope_work_group, , double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_cta_generic_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, , double, atomic_ulong, ulong);
atomic_add(atom_relaxed_gpu_generic_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_device, , double, atomic_ulong, ulong);
atomic_add(atom_acquire_gpu_generic_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_device, , double, atomic_ulong, ulong);
atomic_add(atom_release_gpu_generic_add_f64, memory_order_release, memory_order_acquire, memory_scope_device, , double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_gpu_generic_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_device, , double, atomic_ulong, ulong);
atomic_add(atom_relaxed_sys_generic_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_device, , double, atomic_ulong, ulong);
atomic_add(atom_acquire_sys_generic_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_device, , double, atomic_ulong, ulong);
atomic_add(atom_release_sys_generic_add_f64, memory_order_release, memory_order_acquire, memory_scope_device, , double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_sys_generic_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_device, , double, atomic_ulong, ulong);
// atom.add.f64
atomic_add(atom_relaxed_cta_global_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global, double, atomic_ulong, ulong);
atomic_add(atom_acquire_cta_global_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global, double, atomic_ulong, ulong);
atomic_add(atom_release_cta_global_add_f64, memory_order_release, memory_order_acquire, memory_scope_work_group, __global, double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_cta_global_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global, double, atomic_ulong, ulong);
atomic_add(atom_relaxed_gpu_global_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_acquire_gpu_global_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_release_gpu_global_add_f64, memory_order_release, memory_order_acquire, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_gpu_global_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_relaxed_sys_global_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_acquire_sys_global_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_release_sys_global_add_f64, memory_order_release, memory_order_acquire, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_sys_global_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global, double, atomic_ulong, ulong);
atomic_add(atom_relaxed_cta_shared_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local, double, atomic_ulong, ulong);
atomic_add(atom_acquire_cta_shared_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local, double, atomic_ulong, ulong);
atomic_add(atom_release_cta_shared_add_f64, memory_order_release, memory_order_acquire, memory_scope_work_group, __local, double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_cta_shared_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local, double, atomic_ulong, ulong);
atomic_add(atom_relaxed_gpu_shared_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local, double, atomic_ulong, ulong);
atomic_add(atom_acquire_gpu_shared_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_device, __local, double, atomic_ulong, ulong);
atomic_add(atom_release_gpu_shared_add_f64, memory_order_release, memory_order_acquire, memory_scope_device, __local, double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_gpu_shared_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local, double, atomic_ulong, ulong);
atomic_add(atom_relaxed_sys_shared_add_f64, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local, double, atomic_ulong, ulong);
atomic_add(atom_acquire_sys_shared_add_f64, memory_order_acquire, memory_order_acquire, memory_scope_device, __local, double, atomic_ulong, ulong);
atomic_add(atom_release_sys_shared_add_f64, memory_order_release, memory_order_acquire, memory_scope_device, __local, double, atomic_ulong, ulong);
atomic_add(atom_acq_rel_sys_shared_add_f64, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local, double, atomic_ulong, ulong);
#ifdef INTEL
uint FUNC(bfe_u32)(uint base, uint pos, uint len) {
return intel_ubfe(base, pos, len);
}
ulong FUNC(bfe_u64)(ulong base, uint pos, uint len) {
return intel_ubfe(base, pos, len);
}
int FUNC(bfe_s32)(int base, uint pos, uint len) {
return intel_sbfe(base, pos, len);
}
long FUNC(bfe_s64)(long base, uint pos, uint len) {
return intel_sbfe(base, pos, len);
}
uint FUNC(bfi_b32)(uint insert, uint base, uint offset, uint count) {
return intel_bfi(base, insert, offset, count);
}
ulong FUNC(bfi_b64)(ulong insert, ulong base, uint offset, uint count) {
return intel_bfi(base, insert, offset, count);
}
uint FUNC(brev_b32)(uint base) {
return intel_bfrev(base);
}
ulong FUNC(brev_b64)(ulong base) {
return intel_bfrev(base);
}
#else
uint FUNC(bfe_u32)(uint base, uint pos, uint len) {
return amd_bfe(base, pos, len);
}
ulong FUNC(bfe_u64)(ulong base, uint pos, uint len) {
return (base >> pos) & len;
}
int FUNC(bfe_s32)(int base, uint pos, uint len) {
return amd_bfe(base, pos, len);
}
long FUNC(bfe_s64)(long base, uint pos, uint len) {
return (base >> pos) & len;
}
uint FUNC(bfi_b32)(uint insert, uint base, uint offset, uint count) {
uint mask = amd_bfm(count, offset);
return (~mask & base) | (mask & insert);
}
ulong FUNC(bfi_b64)(ulong insert, ulong base, uint offset, uint count) {
ulong mask = ((1UL << (count & 0x3f)) - 1UL) << (offset & 0x3f);
return (~mask & base) | (mask & insert);
}
extern __attribute__((const)) uint __llvm_bitreverse_i32(uint) __asm("llvm.bitreverse.i32");
uint FUNC(brev_b32)(uint base) {
return __llvm_bitreverse_i32(base);
}
extern __attribute__((const)) ulong __llvm_bitreverse_i64(ulong) __asm("llvm.bitreverse.i64");
ulong FUNC(brev_b64)(ulong base) {
return __llvm_bitreverse_i64(base);
}
// Taken from __ballot definition in hipamd/include/hip/amd_detail/amd_device_functions.h
uint FUNC(activemask)() {
return (uint)__builtin_amdgcn_uicmp(1, 0, 33);
}
uint FUNC(sreg_tid)(uchar dim) {
return (uint)get_local_id(dim);
}
uint FUNC(sreg_ntid)(uchar dim) {
return (uint)get_local_size(dim);
}
uint FUNC(sreg_ctaid)(uchar dim) {
return (uint)get_group_id(dim);
}
uint FUNC(sreg_nctaid)(uchar dim) {
return (uint)get_num_groups(dim);
}
uint FUNC(sreg_clock)() {
return (uint)__builtin_amdgcn_s_memtime();
}
// Taken from __ballot definition in hipamd/include/hip/amd_detail/amd_device_functions.h
// They return active threads, which I think is incorrect
extern __attribute__((const)) uint __ockl_lane_u32();
uint FUNC(sreg_lanemask_lt)() {
uint lane_idx = __ockl_lane_u32();
ulong mask = (1UL << lane_idx) - 1UL;
return (uint)mask;
}
#endif
void FUNC(__assertfail)(
__attribute__((unused)) __private ulong* message,
__attribute__((unused)) __private ulong* file,
__attribute__((unused)) __private uint* line,
__attribute__((unused)) __private ulong* function,
__attribute__((unused)) __private ulong* charSize
) {
}
uint FUNC(vprintf)(
__attribute__((unused)) __generic void* format,
__attribute__((unused)) __generic void* valist
) {
return 0;
}

151
ptx/lib/zluda_ptx_impl.cpp Normal file
View File

@ -0,0 +1,151 @@
// Every time this file changes it must te rebuilt, you need llvm-17:
// /opt/rocm/llvm/bin/clang -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && llvm-dis-17 zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | llvm-as-17 - -o zluda_ptx_impl.bc && llvm-dis-17 zluda_ptx_impl.bc
#include <cstddef>
#include <cstdint>
#define FUNC(NAME) __device__ __attribute__((retain)) __zluda_ptx_impl_##NAME
extern "C"
{
uint32_t FUNC(activemask)()
{
return __builtin_amdgcn_read_exec_lo();
}
size_t __ockl_get_local_id(uint32_t) __device__;
uint32_t FUNC(sreg_tid)(uint8_t member)
{
return (uint32_t)__ockl_get_local_id(member);
}
size_t __ockl_get_local_size(uint32_t) __device__;
uint32_t FUNC(sreg_ntid)(uint8_t member)
{
return (uint32_t)__ockl_get_local_size(member);
}
size_t __ockl_get_global_id(uint32_t) __device__;
uint32_t FUNC(sreg_ctaid)(uint8_t member)
{
return (uint32_t)__ockl_get_global_id(member);
}
size_t __ockl_get_global_size(uint32_t) __device__;
uint32_t FUNC(sreg_nctaid)(uint8_t member)
{
return (uint32_t)__ockl_get_global_size(member);
}
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __attribute__((device));
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
{
uint32_t pos = pos_32 & 0xFFU;
uint32_t len = len_32 & 0xFFU;
if (pos >= 32)
return 0;
// V_BFE_U32 only uses bits [4:0] for len (max value is 31)
if (len >= 32)
return base >> pos;
len = std::min(len, 31U);
return __ockl_bfe_u32(base, pos, len);
}
// LLVM contains mentions of llvm.amdgcn.ubfe.i64 and llvm.amdgcn.sbfe.i64,
// but using it only leads to LLVM crashes on RDNA2
uint64_t FUNC(bfe_u64)(uint64_t base, uint32_t pos, uint32_t len)
{
// NVIDIA docs are incorrect. In 64 bit `bfe` both `pos` and `len`
// parameters use whole 32 bit number and not just bottom 8 bits
if (pos >= 64)
return 0;
if (len >= 64)
return base >> pos;
len = std::min(len, 63U);
return (base >> pos) & ((1UL << len) - 1UL);
}
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __attribute__((device));
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
{
uint32_t pos = pos_32 & 0xFFU;
uint32_t len = len_32 & 0xFFU;
if (len == 0)
return 0;
if (pos >= 32)
return (base >> 31);
// V_BFE_I32 only uses bits [4:0] for len (max value is 31)
if (len >= 32)
return base >> pos;
len = std::min(len, 31U);
return __ockl_bfe_i32(base, pos, len);
}
static __device__ uint32_t add_sat(uint32_t x, uint32_t y)
{
uint32_t result;
if (__builtin_add_overflow(x, y, &result))
{
return UINT32_MAX;
}
else
{
return result;
}
}
static __device__ uint32_t sub_sat(uint32_t x, uint32_t y)
{
uint32_t result;
if (__builtin_sub_overflow(x, y, &result))
{
return 0;
}
else
{
return result;
}
}
int64_t FUNC(bfe_s64)(int64_t base, uint32_t pos, uint32_t len)
{
// NVIDIA docs are incorrect. In 64 bit `bfe` both `pos` and `len`
// parameters use whole 32 bit number and not just bottom 8 bits
if (len == 0)
return 0;
if (pos >= 64)
return (base >> 63U);
if (add_sat(pos, len) >= 64)
len = sub_sat(64, pos);
return (base << (64U - pos - len)) >> (64U - len);
}
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __attribute__((device));
uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32)
{
uint32_t pos = pos_32 & 0xFFU;
uint32_t len = len_32 & 0xFFU;
if (pos >= 32)
return base;
uint32_t mask;
if (len >= 32)
mask = UINT32_MAX << pos;
else
mask = __ockl_bfm_u32(len, pos);
return (~mask & base) | (mask & (insert << pos));
}
uint64_t FUNC(bfi_b64)(uint64_t insert, uint64_t base, uint32_t pos, uint32_t len)
{
// NVIDIA docs are incorrect. In 64 bit `bfe` both `pos` and `len`
// parameters use whole 32 bit number and not just bottom 8 bits
if (pos >= 64)
return base;
uint64_t mask;
if (len >= 64)
mask = UINT64_MAX << pos;
else
mask = ((1UL << len) - 1UL) << (pos);
return (~mask & base) | (mask & (insert << pos));
}
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -1,186 +1,6 @@
#[cfg(test)]
extern crate paste;
#[macro_use]
extern crate lalrpop_util;
#[macro_use]
extern crate quick_error;
extern crate bit_vec;
extern crate half;
#[cfg(test)]
extern crate hip_runtime_sys as hip;
extern crate rspirv;
extern crate spirv_headers as spirv;
#[cfg(test)]
extern crate spirv_tools_sys as spirv_tools;
#[macro_use]
extern crate bitflags;
lalrpop_mod!(
#[allow(warnings)]
ptx
);
pub mod ast;
pub(crate) mod pass; pub(crate) mod pass;
#[cfg(test)] #[cfg(test)]
mod test; mod test;
mod translate;
use std::fmt; pub use pass::to_llvm_module;
pub use crate::ptx::ModuleParser;
use ast::PtxError;
pub use lalrpop_util::lexer::Token;
pub use lalrpop_util::ParseError;
pub use rspirv::dr::Error as SpirvError;
pub use translate::to_spirv_module;
pub use translate::KernelInfo;
pub use translate::TranslateError;
pub trait ModuleParserExt {
fn parse_checked<'input>(
txt: &'input str,
) -> Result<ast::Module<'input>, Vec<ParseError<usize, Token<'input>, ast::PtxError>>>;
// Returned AST might be malformed. Some users, like logger, want to look at
// malformed AST to record information - list of kernels or such
fn parse_unchecked<'input>(
txt: &'input str,
) -> (
ast::Module<'input>,
Vec<ParseError<usize, Token<'input>, ast::PtxError>>,
);
}
impl ModuleParserExt for ModuleParser {
fn parse_checked<'input>(
txt: &'input str,
) -> Result<ast::Module<'input>, Vec<ParseError<usize, Token<'input>, ast::PtxError>>> {
let mut errors = Vec::new();
let maybe_ast = ptx::ModuleParser::new().parse(&mut errors, txt);
match (&*errors, maybe_ast) {
(&[], Ok(ast)) => Ok(ast),
(_, Err(unrecoverable)) => {
errors.push(unrecoverable);
Err(errors)
}
(_, Ok(_)) => Err(errors),
}
}
fn parse_unchecked<'input>(
txt: &'input str,
) -> (
ast::Module<'input>,
Vec<ParseError<usize, Token<'input>, ast::PtxError>>,
) {
let mut errors = Vec::new();
let maybe_ast = ptx::ModuleParser::new().parse(&mut errors, txt);
let ast = match maybe_ast {
Ok(ast) => ast,
Err(unrecoverable_err) => {
errors.push(unrecoverable_err);
ast::Module {
version: (0, 0),
directives: Vec::new(),
}
}
};
(ast, errors)
}
}
pub struct DisplayParseError<'a, Loc, Tok, Err>(&'a str, &'a ParseError<Loc, Tok, Err>);
impl<'a, Loc: fmt::Display + Into<usize> + Copy, Tok, Err> DisplayParseError<'a, Loc, Tok, Err> {
// unsafe because there's no guarantee that the input str is the one that this error was created from
pub unsafe fn new(error: &'a ParseError<Loc, Tok, Err>, text: &'a str) -> Self {
Self(text, error)
}
}
impl<'a, Loc, Tok> fmt::Display for DisplayParseError<'a, Loc, Tok, PtxError>
where
Loc: fmt::Display,
Tok: fmt::Display,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.1 {
ParseError::User {
error: PtxError::UnrecognizedStatement { start, end },
} => self.fmt_unrecognized(f, *start, *end, "statement"),
ParseError::User {
error: PtxError::UnrecognizedDirective { start, end },
} => self.fmt_unrecognized(f, *start, *end, "directive"),
_ => self.1.fmt(f),
}
}
}
impl<'a, Loc, Tok, Err> DisplayParseError<'a, Loc, Tok, Err> {
fn fmt_unrecognized(
&self,
f: &mut fmt::Formatter,
start: usize,
end: usize,
kind: &'static str,
) -> fmt::Result {
let full_substring = unsafe { self.0.get_unchecked(start..end) };
write!(
f,
"Unrecognized {} `{}` found at {}:{}",
kind, full_substring, start, end
)
}
}
pub(crate) fn without_none<T>(x: Vec<Option<T>>) -> Vec<T> {
x.into_iter().filter_map(|x| x).collect()
}
pub(crate) fn vector_index<'input>(
inp: &'input str,
) -> Result<u8, ParseError<usize, lalrpop_util::lexer::Token<'input>, ast::PtxError>> {
match inp {
"x" | "r" => Ok(0),
"y" | "g" => Ok(1),
"z" | "b" => Ok(2),
"w" | "a" => Ok(3),
_ => Err(ParseError::User {
error: ast::PtxError::WrongVectorElement,
}),
}
}
#[cfg(test)]
mod tests {
use crate::{DisplayParseError, ModuleParser, ModuleParserExt};
#[test]
fn error_report_unknown_instructions() {
let module = r#"
.version 6.5
.target sm_30
.address_size 64
.visible .entry add(
.param .u64 input,
)
{
.reg .u64 x;
does_not_exist.u64 x, x;
ret;
}"#;
let errors = match ModuleParser::parse_checked(module) {
Err(e) => e,
Ok(_) => panic!(),
};
assert_eq!(errors.len(), 1);
let reporter = DisplayParseError(module, &errors[0]);
let build_log_string = format!("{}", reporter);
assert!(build_log_string.contains("does_not_exist"));
}
}

View File

@ -1,299 +0,0 @@
use std::collections::{BTreeMap, BTreeSet};
use super::*;
/*
PTX represents dynamically allocated shared local memory as
.extern .shared .b32 shared_mem[];
In SPIRV/OpenCL world this is expressed as an additional argument to the kernel
And in AMD compilation
This pass looks for all uses of .extern .shared and converts them to
an additional method argument
The question is how this artificial argument should be expressed. There are
several options:
* Straight conversion:
.shared .b32 shared_mem[]
* Introduce .param_shared statespace:
.param_shared .b32 shared_mem
or
.param_shared .b32 shared_mem[]
* Introduce .shared_ptr <SCALAR> type:
.param .shared_ptr .b32 shared_mem
* Reuse .ptr hint:
.param .u64 .ptr shared_mem
This is the most tempting, but also the most nonsensical, .ptr is just a
hint, which has no semantical meaning (and the output of our
transformation has a semantical meaning - we emit additional
"OpFunctionParameter ..." with type "OpTypePointer Workgroup ...")
*/
pub(super) fn run<'input>(
module: Vec<Directive<'input>>,
kernels_methods_call_map: &MethodsCallMap<'input>,
new_id: &mut impl FnMut() -> SpirvWord,
) -> Result<Vec<Directive<'input>>, TranslateError> {
let mut globals_shared = HashMap::new();
for dir in module.iter() {
match dir {
Directive::Variable(
_,
ast::Variable {
state_space: ast::StateSpace::Shared,
name,
v_type,
..
},
) => {
globals_shared.insert(*name, v_type.clone());
}
_ => {}
}
}
if globals_shared.len() == 0 {
return Ok(module);
}
let mut methods_to_directly_used_shared_globals = HashMap::<_, HashSet<SpirvWord>>::new();
let module = module
.into_iter()
.map(|directive| match directive {
Directive::Method(Function {
func_decl,
globals,
body: Some(statements),
import_as,
tuning,
linkage,
}) => {
let call_key = (*func_decl).borrow().name;
let statements = statements
.into_iter()
.map(|statement| {
statement.visit_map(
&mut |id, _: Option<(&ast::Type, ast::StateSpace)>, _, _| {
if let Some(_) = globals_shared.get(&id) {
methods_to_directly_used_shared_globals
.entry(call_key)
.or_insert_with(HashSet::new)
.insert(id);
}
Ok::<_, TranslateError>(id)
},
)
})
.collect::<Result<Vec<_>, _>>()?;
Ok::<_, TranslateError>(Directive::Method(Function {
func_decl,
globals,
body: Some(statements),
import_as,
tuning,
linkage,
}))
}
directive => Ok(directive),
})
.collect::<Result<Vec<_>, _>>()?;
// If there's a chain `kernel` -> `fn1` -> `fn2`, where only `fn2` uses extern shared,
// make sure it gets propagated to `fn1` and `kernel`
let methods_to_indirectly_used_shared_globals = resolve_indirect_uses_of_globals_shared(
methods_to_directly_used_shared_globals,
kernels_methods_call_map,
);
// now visit every method declaration and inject those additional arguments
let mut directives = Vec::with_capacity(module.len());
for directive in module.into_iter() {
match directive {
Directive::Method(Function {
func_decl,
globals,
body: Some(statements),
import_as,
tuning,
linkage,
}) => {
let statements = {
let func_decl_ref = &mut (*func_decl).borrow_mut();
let method_name = func_decl_ref.name;
insert_arguments_remap_statements(
new_id,
kernels_methods_call_map,
&globals_shared,
&methods_to_indirectly_used_shared_globals,
method_name,
&mut directives,
func_decl_ref,
statements,
)?
};
directives.push(Directive::Method(Function {
func_decl,
globals,
body: Some(statements),
import_as,
tuning,
linkage,
}));
}
directive => directives.push(directive),
}
}
Ok(directives)
}
// We need to compute two kinds of information:
// * If it's a kernel -> size of .shared globals in use (direct or indirect)
// * If it's a function -> does it use .shared global (directly or indirectly)
fn resolve_indirect_uses_of_globals_shared<'input>(
methods_use_of_globals_shared: HashMap<ast::MethodName<'input, SpirvWord>, HashSet<SpirvWord>>,
kernels_methods_call_map: &MethodsCallMap<'input>,
) -> HashMap<ast::MethodName<'input, SpirvWord>, BTreeSet<SpirvWord>> {
let mut result = HashMap::new();
for (method, callees) in kernels_methods_call_map.methods() {
let mut indirect_globals = methods_use_of_globals_shared
.get(&method)
.into_iter()
.flatten()
.copied()
.collect::<BTreeSet<_>>();
for &callee in callees {
indirect_globals.extend(
methods_use_of_globals_shared
.get(&ast::MethodName::Func(callee))
.into_iter()
.flatten()
.copied(),
);
}
result.insert(method, indirect_globals);
}
result
}
fn insert_arguments_remap_statements<'input>(
new_id: &mut impl FnMut() -> SpirvWord,
kernels_methods_call_map: &MethodsCallMap<'input>,
globals_shared: &HashMap<SpirvWord, ast::Type>,
methods_to_indirectly_used_shared_globals: &HashMap<
ast::MethodName<'input, SpirvWord>,
BTreeSet<SpirvWord>,
>,
method_name: ast::MethodName<SpirvWord>,
result: &mut Vec<Directive>,
func_decl_ref: &mut std::cell::RefMut<ast::MethodDeclaration<SpirvWord>>,
statements: Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
) -> Result<Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
let remapped_globals_in_method =
if let Some(method_globals) = methods_to_indirectly_used_shared_globals.get(&method_name) {
match method_name {
ast::MethodName::Func(..) => {
let remapped_globals = method_globals
.iter()
.map(|global| {
(
*global,
(
new_id(),
globals_shared
.get(&global)
.unwrap_or_else(|| todo!())
.clone(),
),
)
})
.collect::<BTreeMap<_, _>>();
for (_, (new_shared_global_id, shared_global_type)) in remapped_globals.iter() {
func_decl_ref.input_arguments.push(ast::Variable {
align: None,
v_type: shared_global_type.clone(),
state_space: ast::StateSpace::Shared,
name: *new_shared_global_id,
array_init: Vec::new(),
});
}
remapped_globals
}
ast::MethodName::Kernel(..) => method_globals
.iter()
.map(|global| {
(
*global,
(
*global,
globals_shared
.get(&global)
.unwrap_or_else(|| todo!())
.clone(),
),
)
})
.collect::<BTreeMap<_, _>>(),
}
} else {
return Ok(statements);
};
replace_uses_of_shared_memory(
new_id,
methods_to_indirectly_used_shared_globals,
statements,
remapped_globals_in_method,
)
}
fn replace_uses_of_shared_memory<'input>(
new_id: &mut impl FnMut() -> SpirvWord,
methods_to_indirectly_used_shared_globals: &HashMap<
ast::MethodName<'input, SpirvWord>,
BTreeSet<SpirvWord>,
>,
statements: Vec<ExpandedStatement>,
remapped_globals_in_method: BTreeMap<SpirvWord, (SpirvWord, ast::Type)>,
) -> Result<Vec<ExpandedStatement>, TranslateError> {
let mut result = Vec::with_capacity(statements.len());
for statement in statements {
match statement {
Statement::Instruction(ast::Instruction::Call {
mut data,
mut arguments,
}) => {
// We can safely skip checking call arguments,
// because there's simply no way to pass shared ptr
// without converting it to .b64 first
if let Some(shared_globals_used_by_callee) =
methods_to_indirectly_used_shared_globals
.get(&ast::MethodName::Func(arguments.func))
{
for &shared_global_used_by_callee in shared_globals_used_by_callee {
let (remapped_shared_id, type_) = remapped_globals_in_method
.get(&shared_global_used_by_callee)
.unwrap_or_else(|| todo!());
data.input_arguments
.push((type_.clone(), ast::StateSpace::Shared));
arguments.input_arguments.push(*remapped_shared_id);
}
}
result.push(Statement::Instruction(ast::Instruction::Call {
data,
arguments,
}))
}
statement => {
let new_statement =
statement.visit_map(&mut |id,
_: Option<(&ast::Type, ast::StateSpace)>,
_,
_| {
Ok::<_, TranslateError>(
if let Some((remapped_shared_id, _)) =
remapped_globals_in_method.get(&id)
{
*remapped_shared_id
} else {
id
},
)
})?;
result.push(new_statement);
}
}
}
Ok(result)
}

View File

@ -1,524 +0,0 @@
use super::*;
use ptx_parser as ast;
use std::{
collections::{BTreeSet, HashSet},
iter,
rc::Rc,
};
/*
Our goal here is to transform
.visible .entry foobar(.param .u64 input) {
.reg .b64 in_addr;
.reg .b64 in_addr2;
ld.param.u64 in_addr, [input];
cvta.to.global.u64 in_addr2, in_addr;
}
into:
.visible .entry foobar(.param .u8 input[]) {
.reg .u8 in_addr[];
.reg .u8 in_addr2[];
ld.param.u8[] in_addr, [input];
mov.u8[] in_addr2, in_addr;
}
or:
.visible .entry foobar(.reg .u8 input[]) {
.reg .u8 in_addr[];
.reg .u8 in_addr2[];
mov.u8[] in_addr, input;
mov.u8[] in_addr2, in_addr;
}
or:
.visible .entry foobar(.param ptr<u8, global> input) {
.reg ptr<u8, global> in_addr;
.reg ptr<u8, global> in_addr2;
ld.param.ptr<u8, global> in_addr, [input];
mov.ptr<u8, global> in_addr2, in_addr;
}
*/
// TODO: detect more patterns (mov, call via reg, call via param)
// TODO: don't convert to ptr if the register is not ultimately used for ld/st
// TODO: once insert_mem_ssa_statements is moved to later, move this pass after
// argument expansion
// TODO: propagate out of calls and into calls
pub(super) fn run<'a, 'input>(
func_args: Rc<RefCell<ast::MethodDeclaration<'input, SpirvWord>>>,
func_body: Vec<TypedStatement>,
id_defs: &mut NumericIdResolver<'a>,
) -> Result<
(
Rc<RefCell<ast::MethodDeclaration<'input, SpirvWord>>>,
Vec<TypedStatement>,
),
TranslateError,
> {
let mut method_decl = func_args.borrow_mut();
if !matches!(method_decl.name, ast::MethodName::Kernel(..)) {
drop(method_decl);
return Ok((func_args, func_body));
}
if Rc::strong_count(&func_args) != 1 {
return Err(error_unreachable());
}
let func_args_64bit = (*method_decl)
.input_arguments
.iter()
.filter_map(|arg| match arg.v_type {
ast::Type::Scalar(ast::ScalarType::U64)
| ast::Type::Scalar(ast::ScalarType::B64)
| ast::Type::Scalar(ast::ScalarType::S64) => Some(arg.name),
_ => None,
})
.collect::<HashSet<_>>();
let mut stateful_markers = Vec::new();
let mut stateful_init_reg = HashMap::<_, Vec<_>>::new();
for statement in func_body.iter() {
match statement {
Statement::Instruction(ast::Instruction::Cvta {
data:
ast::CvtaDetails {
state_space: ast::StateSpace::Global,
direction: ast::CvtaDirection::GenericToExplicit,
},
arguments,
}) => {
if let (TypedOperand::Reg(dst), Some(src)) =
(arguments.dst, arguments.src.underlying_register())
{
if is_64_bit_integer(id_defs, src) && is_64_bit_integer(id_defs, dst) {
stateful_markers.push((dst, src));
}
}
}
Statement::Instruction(ast::Instruction::Ld {
data:
ast::LdDetails {
state_space: ast::StateSpace::Param,
typ: ast::Type::Scalar(ast::ScalarType::U64),
..
},
arguments,
})
| Statement::Instruction(ast::Instruction::Ld {
data:
ast::LdDetails {
state_space: ast::StateSpace::Param,
typ: ast::Type::Scalar(ast::ScalarType::S64),
..
},
arguments,
})
| Statement::Instruction(ast::Instruction::Ld {
data:
ast::LdDetails {
state_space: ast::StateSpace::Param,
typ: ast::Type::Scalar(ast::ScalarType::B64),
..
},
arguments,
}) => {
if let (TypedOperand::Reg(dst), Some(src)) =
(arguments.dst, arguments.src.underlying_register())
{
if func_args_64bit.contains(&src) {
multi_hash_map_append(&mut stateful_init_reg, dst, src);
}
}
}
_ => {}
}
}
if stateful_markers.len() == 0 {
drop(method_decl);
return Ok((func_args, func_body));
}
let mut func_args_ptr = HashSet::new();
let mut regs_ptr_current = HashSet::new();
for (dst, src) in stateful_markers {
if let Some(func_args) = stateful_init_reg.get(&src) {
for a in func_args {
func_args_ptr.insert(*a);
regs_ptr_current.insert(src);
regs_ptr_current.insert(dst);
}
}
}
// BTreeSet here to have a stable order of iteration,
// unfortunately our tests rely on it
let mut regs_ptr_seen = BTreeSet::new();
while regs_ptr_current.len() > 0 {
let mut regs_ptr_new = HashSet::new();
for statement in func_body.iter() {
match statement {
Statement::Instruction(ast::Instruction::Add {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::U64,
saturate: false,
}),
arguments,
})
| Statement::Instruction(ast::Instruction::Add {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::S64,
saturate: false,
}),
arguments,
}) => {
// TODO: don't mark result of double pointer sub or double
// pointer add as ptr result
if let (TypedOperand::Reg(dst), Some(src1)) =
(arguments.dst, arguments.src1.underlying_register())
{
if regs_ptr_current.contains(&src1) && !regs_ptr_seen.contains(&src1) {
regs_ptr_new.insert(dst);
}
} else if let (TypedOperand::Reg(dst), Some(src2)) =
(arguments.dst, arguments.src2.underlying_register())
{
if regs_ptr_current.contains(&src2) && !regs_ptr_seen.contains(&src2) {
regs_ptr_new.insert(dst);
}
}
}
Statement::Instruction(ast::Instruction::Sub {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::U64,
saturate: false,
}),
arguments,
})
| Statement::Instruction(ast::Instruction::Sub {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::S64,
saturate: false,
}),
arguments,
}) => {
// TODO: don't mark result of double pointer sub or double
// pointer add as ptr result
if let (TypedOperand::Reg(dst), Some(src1)) =
(arguments.dst, arguments.src1.underlying_register())
{
if regs_ptr_current.contains(&src1) && !regs_ptr_seen.contains(&src1) {
regs_ptr_new.insert(dst);
}
} else if let (TypedOperand::Reg(dst), Some(src2)) =
(arguments.dst, arguments.src2.underlying_register())
{
if regs_ptr_current.contains(&src2) && !regs_ptr_seen.contains(&src2) {
regs_ptr_new.insert(dst);
}
}
}
_ => {}
}
}
for id in regs_ptr_current {
regs_ptr_seen.insert(id);
}
regs_ptr_current = regs_ptr_new;
}
drop(regs_ptr_current);
let mut remapped_ids = HashMap::new();
let mut result = Vec::with_capacity(regs_ptr_seen.len() + func_body.len());
for reg in regs_ptr_seen {
let new_id = id_defs.register_variable(
ast::Type::Pointer(ast::ScalarType::U8, ast::StateSpace::Global),
ast::StateSpace::Reg,
);
result.push(Statement::Variable(ast::Variable {
align: None,
name: new_id,
array_init: Vec::new(),
v_type: ast::Type::Pointer(ast::ScalarType::U8, ast::StateSpace::Global),
state_space: ast::StateSpace::Reg,
}));
remapped_ids.insert(reg, new_id);
}
for arg in (*method_decl).input_arguments.iter_mut() {
if !func_args_ptr.contains(&arg.name) {
continue;
}
let new_id = id_defs.register_variable(
ast::Type::Pointer(ast::ScalarType::U8, ast::StateSpace::Global),
ast::StateSpace::Param,
);
let old_name = arg.name;
arg.v_type = ast::Type::Pointer(ast::ScalarType::U8, ast::StateSpace::Global);
arg.name = new_id;
remapped_ids.insert(old_name, new_id);
}
for statement in func_body {
match statement {
l @ Statement::Label(_) => result.push(l),
c @ Statement::Conditional(_) => result.push(c),
c @ Statement::Constant(..) => result.push(c),
Statement::Variable(var) => {
if !remapped_ids.contains_key(&var.name) {
result.push(Statement::Variable(var));
}
}
Statement::Instruction(ast::Instruction::Add {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::U64,
saturate: false,
}),
arguments,
})
| Statement::Instruction(ast::Instruction::Add {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::S64,
saturate: false,
}),
arguments,
}) if is_add_ptr_direct(&remapped_ids, &arguments) => {
let (ptr, offset) = match arguments.src1.underlying_register() {
Some(src1) if remapped_ids.contains_key(&src1) => {
(remapped_ids.get(&src1).unwrap(), arguments.src2)
}
Some(src2) if remapped_ids.contains_key(&src2) => {
(remapped_ids.get(&src2).unwrap(), arguments.src1)
}
_ => return Err(error_unreachable()),
};
let dst = arguments.dst.unwrap_reg()?;
result.push(Statement::PtrAccess(PtrAccess {
underlying_type: ast::Type::Scalar(ast::ScalarType::U8),
state_space: ast::StateSpace::Global,
dst: *remapped_ids.get(&dst).unwrap(),
ptr_src: *ptr,
offset_src: offset,
}))
}
Statement::Instruction(ast::Instruction::Sub {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::U64,
saturate: false,
}),
arguments,
})
| Statement::Instruction(ast::Instruction::Sub {
data:
ast::ArithDetails::Integer(ast::ArithInteger {
type_: ast::ScalarType::S64,
saturate: false,
}),
arguments,
}) if is_sub_ptr_direct(&remapped_ids, &arguments) => {
let (ptr, offset) = match arguments.src1.underlying_register() {
Some(ref src1) => (remapped_ids.get(src1).unwrap(), arguments.src2),
_ => return Err(error_unreachable()),
};
let offset_neg = id_defs.register_intermediate(Some((
ast::Type::Scalar(ast::ScalarType::S64),
ast::StateSpace::Reg,
)));
result.push(Statement::Instruction(ast::Instruction::Neg {
data: ast::TypeFtz {
type_: ast::ScalarType::S64,
flush_to_zero: None,
},
arguments: ast::NegArgs {
src: offset,
dst: TypedOperand::Reg(offset_neg),
},
}));
let dst = arguments.dst.unwrap_reg()?;
result.push(Statement::PtrAccess(PtrAccess {
underlying_type: ast::Type::Scalar(ast::ScalarType::U8),
state_space: ast::StateSpace::Global,
dst: *remapped_ids.get(&dst).unwrap(),
ptr_src: *ptr,
offset_src: TypedOperand::Reg(offset_neg),
}))
}
inst @ Statement::Instruction(_) => {
let mut post_statements = Vec::new();
let new_statement = inst.visit_map(&mut FnVisitor::new(
|operand, type_space, is_dst, relaxed_conversion| {
convert_to_stateful_memory_access_postprocess(
id_defs,
&remapped_ids,
&mut result,
&mut post_statements,
operand,
type_space,
is_dst,
relaxed_conversion,
)
},
))?;
result.push(new_statement);
result.extend(post_statements);
}
repack @ Statement::RepackVector(_) => {
let mut post_statements = Vec::new();
let new_statement = repack.visit_map(&mut FnVisitor::new(
|operand, type_space, is_dst, relaxed_conversion| {
convert_to_stateful_memory_access_postprocess(
id_defs,
&remapped_ids,
&mut result,
&mut post_statements,
operand,
type_space,
is_dst,
relaxed_conversion,
)
},
))?;
result.push(new_statement);
result.extend(post_statements);
}
_ => return Err(error_unreachable()),
}
}
drop(method_decl);
Ok((func_args, result))
}
fn is_64_bit_integer(id_defs: &NumericIdResolver, id: SpirvWord) -> bool {
match id_defs.get_typed(id) {
Ok((ast::Type::Scalar(ast::ScalarType::U64), _, _))
| Ok((ast::Type::Scalar(ast::ScalarType::S64), _, _))
| Ok((ast::Type::Scalar(ast::ScalarType::B64), _, _)) => true,
_ => false,
}
}
fn is_add_ptr_direct(
remapped_ids: &HashMap<SpirvWord, SpirvWord>,
arg: &ast::AddArgs<TypedOperand>,
) -> bool {
match arg.dst {
TypedOperand::Imm(..) | TypedOperand::RegOffset(..) | TypedOperand::VecMember(..) => {
return false
}
TypedOperand::Reg(dst) => {
if !remapped_ids.contains_key(&dst) {
return false;
}
if let Some(ref src1_reg) = arg.src1.underlying_register() {
if remapped_ids.contains_key(src1_reg) {
// don't trigger optimization when adding two pointers
if let Some(ref src2_reg) = arg.src2.underlying_register() {
return !remapped_ids.contains_key(src2_reg);
}
}
}
if let Some(ref src2_reg) = arg.src2.underlying_register() {
remapped_ids.contains_key(src2_reg)
} else {
false
}
}
}
}
fn is_sub_ptr_direct(
remapped_ids: &HashMap<SpirvWord, SpirvWord>,
arg: &ast::SubArgs<TypedOperand>,
) -> bool {
match arg.dst {
TypedOperand::Imm(..) | TypedOperand::RegOffset(..) | TypedOperand::VecMember(..) => {
return false
}
TypedOperand::Reg(dst) => {
if !remapped_ids.contains_key(&dst) {
return false;
}
match arg.src1.underlying_register() {
Some(ref src1_reg) => {
if remapped_ids.contains_key(src1_reg) {
// don't trigger optimization when subtracting two pointers
arg.src2
.underlying_register()
.map_or(true, |ref src2_reg| !remapped_ids.contains_key(src2_reg))
} else {
false
}
}
None => false,
}
}
}
}
fn convert_to_stateful_memory_access_postprocess(
id_defs: &mut NumericIdResolver,
remapped_ids: &HashMap<SpirvWord, SpirvWord>,
result: &mut Vec<TypedStatement>,
post_statements: &mut Vec<TypedStatement>,
operand: TypedOperand,
type_space: Option<(&ast::Type, ast::StateSpace)>,
is_dst: bool,
relaxed_conversion: bool,
) -> Result<TypedOperand, TranslateError> {
operand.map(|operand, _| {
Ok(match remapped_ids.get(&operand) {
Some(new_id) => {
let (new_operand_type, new_operand_space, _) = id_defs.get_typed(*new_id)?;
// TODO: readd if required
if let Some((expected_type, expected_space)) = type_space {
let implicit_conversion = if relaxed_conversion {
if is_dst {
super::insert_implicit_conversions::should_convert_relaxed_dst_wrapper
} else {
super::insert_implicit_conversions::should_convert_relaxed_src_wrapper
}
} else {
super::insert_implicit_conversions::default_implicit_conversion
};
if implicit_conversion(
(new_operand_space, &new_operand_type),
(expected_space, expected_type),
)
.is_ok()
{
return Ok(*new_id);
}
}
let (old_operand_type, old_operand_space, _) = id_defs.get_typed(operand)?;
let converting_id = id_defs
.register_intermediate(Some((old_operand_type.clone(), old_operand_space)));
let kind = if new_operand_space == ast::StateSpace::Reg {
ConversionKind::Default
} else {
ConversionKind::PtrToPtr
};
if is_dst {
post_statements.push(Statement::Conversion(ImplicitConversion {
src: converting_id,
dst: *new_id,
from_type: old_operand_type,
from_space: old_operand_space,
to_type: new_operand_type,
to_space: new_operand_space,
kind,
}));
converting_id
} else {
result.push(Statement::Conversion(ImplicitConversion {
src: *new_id,
dst: converting_id,
from_type: new_operand_type,
from_space: new_operand_space,
to_type: old_operand_type,
to_space: old_operand_space,
kind,
}));
converting_id
}
}
None => operand,
})
})
}

View File

@ -1,138 +0,0 @@
use super::*;
use ptx_parser as ast;
pub(crate) fn run(
func: Vec<UnconditionalStatement>,
fn_defs: &GlobalFnDeclResolver,
id_defs: &mut NumericIdResolver,
) -> Result<Vec<TypedStatement>, TranslateError> {
let mut result = Vec::<TypedStatement>::with_capacity(func.len());
for s in func {
match s {
Statement::Instruction(inst) => match inst {
ast::Instruction::Mov {
data,
arguments:
ast::MovArgs {
dst: ast::ParsedOperand::Reg(dst_reg),
src: ast::ParsedOperand::Reg(src_reg),
},
} if fn_defs.fns.contains_key(&src_reg) => {
if data.typ != ast::Type::Scalar(ast::ScalarType::U64) {
return Err(error_mismatched_type());
}
result.push(TypedStatement::FunctionPointer(FunctionPointerDetails {
dst: dst_reg,
src: src_reg,
}));
}
ast::Instruction::Call { data, arguments } => {
let resolver = fn_defs.get_fn_sig_resolver(arguments.func)?;
let resolved_call = resolver.resolve_in_spirv_repr(data, arguments)?;
let mut visitor = VectorRepackVisitor::new(&mut result, id_defs);
let reresolved_call =
Statement::Instruction(ast::visit_map(resolved_call, &mut visitor)?);
visitor.func.push(reresolved_call);
visitor.func.extend(visitor.post_stmts);
}
inst => {
let mut visitor = VectorRepackVisitor::new(&mut result, id_defs);
let instruction = Statement::Instruction(ast::visit_map(inst, &mut visitor)?);
visitor.func.push(instruction);
visitor.func.extend(visitor.post_stmts);
}
},
Statement::Label(i) => result.push(Statement::Label(i)),
Statement::Variable(v) => result.push(Statement::Variable(v)),
Statement::Conditional(c) => result.push(Statement::Conditional(c)),
_ => return Err(error_unreachable()),
}
}
Ok(result)
}
struct VectorRepackVisitor<'a, 'b> {
func: &'b mut Vec<TypedStatement>,
id_def: &'b mut NumericIdResolver<'a>,
post_stmts: Option<TypedStatement>,
}
impl<'a, 'b> VectorRepackVisitor<'a, 'b> {
fn new(func: &'b mut Vec<TypedStatement>, id_def: &'b mut NumericIdResolver<'a>) -> Self {
VectorRepackVisitor {
func,
id_def,
post_stmts: None,
}
}
fn convert_vector(
&mut self,
is_dst: bool,
relaxed_type_check: bool,
typ: &ast::Type,
state_space: ast::StateSpace,
idx: Vec<SpirvWord>,
) -> Result<SpirvWord, TranslateError> {
// mov.u32 foobar, {a,b};
let scalar_t = match typ {
ast::Type::Vector(_, scalar_t) => *scalar_t,
_ => return Err(error_mismatched_type()),
};
let temp_vec = self
.id_def
.register_intermediate(Some((typ.clone(), state_space)));
let statement = Statement::RepackVector(RepackVectorDetails {
is_extract: is_dst,
typ: scalar_t,
packed: temp_vec,
unpacked: idx,
relaxed_type_check,
});
if is_dst {
self.post_stmts = Some(statement);
} else {
self.func.push(statement);
}
Ok(temp_vec)
}
}
impl<'a, 'b> ast::VisitorMap<ast::ParsedOperand<SpirvWord>, TypedOperand, TranslateError>
for VectorRepackVisitor<'a, 'b>
{
fn visit_ident(
&mut self,
ident: SpirvWord,
_: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
_: bool,
_: bool,
) -> Result<SpirvWord, TranslateError> {
Ok(ident)
}
fn visit(
&mut self,
op: ast::ParsedOperand<SpirvWord>,
type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
is_dst: bool,
relaxed_type_check: bool,
) -> Result<TypedOperand, TranslateError> {
Ok(match op {
ast::ParsedOperand::Reg(reg) => TypedOperand::Reg(reg),
ast::ParsedOperand::RegOffset(reg, offset) => TypedOperand::RegOffset(reg, offset),
ast::ParsedOperand::Imm(x) => TypedOperand::Imm(x),
ast::ParsedOperand::VecMember(vec, idx) => TypedOperand::VecMember(vec, idx),
ast::ParsedOperand::VecPack(vec) => {
let (type_, space) = type_space.ok_or_else(|| error_mismatched_type())?;
TypedOperand::Reg(self.convert_vector(
is_dst,
relaxed_type_check,
type_,
space,
vec,
)?)
}
})
}
}

View File

@ -1,5 +1,3 @@
use std::collections::BTreeMap;
use super::*; use super::*;
pub(super) fn run<'a, 'input>( pub(super) fn run<'a, 'input>(
@ -26,75 +24,73 @@ fn run_method<'input>(
resolver: &mut GlobalStringIdentResolver2, resolver: &mut GlobalStringIdentResolver2,
mut method: Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>, mut method: Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>,
) -> Result<Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> { ) -> Result<Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
if method.func_decl.name.is_kernel() {
return Ok(method);
}
let is_declaration = method.body.is_none(); let is_declaration = method.body.is_none();
let mut body = Vec::new(); let mut body = Vec::new();
let mut remap_returns = Vec::new(); let mut remap_returns = Vec::new();
for arg in method.func_decl.return_arguments.iter_mut() { if !method.func_decl.name.is_kernel() {
match arg.state_space { for arg in method.func_decl.return_arguments.iter_mut() {
ptx_parser::StateSpace::Param => { match arg.state_space {
arg.state_space = ptx_parser::StateSpace::Reg; ptx_parser::StateSpace::Param => {
let old_name = arg.name; arg.state_space = ptx_parser::StateSpace::Reg;
arg.name = resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space))); let old_name = arg.name;
if is_declaration { arg.name =
continue; resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space)));
if is_declaration {
continue;
}
remap_returns.push((old_name, arg.name, arg.v_type.clone()));
body.push(Statement::Variable(ast::Variable {
align: None,
name: old_name,
v_type: arg.v_type.clone(),
state_space: ptx_parser::StateSpace::Param,
array_init: Vec::new(),
}));
} }
remap_returns.push((old_name, arg.name, arg.v_type.clone())); ptx_parser::StateSpace::Reg => {}
body.push(Statement::Variable(ast::Variable { _ => return Err(error_unreachable()),
align: None,
name: old_name,
v_type: arg.v_type.clone(),
state_space: ptx_parser::StateSpace::Param,
array_init: Vec::new(),
}));
} }
ptx_parser::StateSpace::Reg => {}
_ => return Err(error_unreachable()),
} }
} for arg in method.func_decl.input_arguments.iter_mut() {
for arg in method.func_decl.input_arguments.iter_mut() { match arg.state_space {
match arg.state_space { ptx_parser::StateSpace::Param => {
ptx_parser::StateSpace::Param => { arg.state_space = ptx_parser::StateSpace::Reg;
arg.state_space = ptx_parser::StateSpace::Reg; let old_name = arg.name;
let old_name = arg.name; arg.name =
arg.name = resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space))); resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space)));
if is_declaration { if is_declaration {
continue; continue;
}
body.push(Statement::Variable(ast::Variable {
align: None,
name: old_name,
v_type: arg.v_type.clone(),
state_space: ptx_parser::StateSpace::Param,
array_init: Vec::new(),
}));
body.push(Statement::Instruction(ast::Instruction::St {
data: ast::StData {
qualifier: ast::LdStQualifier::Weak,
state_space: ast::StateSpace::Param,
caching: ast::StCacheOperator::Writethrough,
typ: arg.v_type.clone(),
},
arguments: ast::StArgs {
src1: old_name,
src2: arg.name,
},
}));
} }
body.push(Statement::Variable(ast::Variable { ptx_parser::StateSpace::Reg => {}
align: None, _ => return Err(error_unreachable()),
name: old_name,
v_type: arg.v_type.clone(),
state_space: ptx_parser::StateSpace::Param,
array_init: Vec::new(),
}));
body.push(Statement::Instruction(ast::Instruction::St {
data: ast::StData {
qualifier: ast::LdStQualifier::Weak,
state_space: ast::StateSpace::Param,
caching: ast::StCacheOperator::Writethrough,
typ: arg.v_type.clone(),
},
arguments: ast::StArgs {
src1: old_name,
src2: arg.name,
},
}));
} }
ptx_parser::StateSpace::Reg => {}
_ => return Err(error_unreachable()),
} }
} }
if remap_returns.is_empty() {
return Ok(method);
}
let body = method let body = method
.body .body
.map(|statements| { .map(|statements| {
for statement in statements { for statement in statements {
run_statement(&remap_returns, &mut body, statement)?; run_statement(resolver, &remap_returns, &mut body, statement)?;
} }
Ok::<_, TranslateError>(body) Ok::<_, TranslateError>(body)
}) })
@ -110,28 +106,89 @@ fn run_method<'input>(
} }
fn run_statement<'input>( fn run_statement<'input>(
resolver: &mut GlobalStringIdentResolver2<'input>,
remap_returns: &Vec<(SpirvWord, SpirvWord, ast::Type)>, remap_returns: &Vec<(SpirvWord, SpirvWord, ast::Type)>,
result: &mut Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>, result: &mut Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
statement: Statement<ast::Instruction<SpirvWord>, SpirvWord>, statement: Statement<ast::Instruction<SpirvWord>, SpirvWord>,
) -> Result<(), TranslateError> { ) -> Result<(), TranslateError> {
match statement { match statement {
Statement::Instruction(ast::Instruction::Ret { .. }) => { Statement::Instruction(ast::Instruction::Call {
for (old_name, new_name, type_) in remap_returns.iter().cloned() { mut data,
mut arguments,
}) => {
let mut post_st = Vec::new();
for ((type_, space), ident) in data
.input_arguments
.iter_mut()
.zip(arguments.input_arguments.iter_mut())
{
if *space == ptx_parser::StateSpace::Param {
*space = ptx_parser::StateSpace::Reg;
let old_name = *ident;
*ident = resolver
.register_unnamed(Some((type_.clone(), ptx_parser::StateSpace::Reg)));
result.push(Statement::Instruction(ast::Instruction::Ld {
data: ast::LdDetails {
qualifier: ast::LdStQualifier::Weak,
state_space: ast::StateSpace::Param,
caching: ast::LdCacheOperator::Cached,
typ: type_.clone(),
non_coherent: false,
},
arguments: ast::LdArgs {
dst: *ident,
src: old_name,
},
}));
}
}
for ((type_, space), ident) in data
.return_arguments
.iter_mut()
.zip(arguments.return_arguments.iter_mut())
{
if *space == ptx_parser::StateSpace::Param {
*space = ptx_parser::StateSpace::Reg;
let old_name = *ident;
*ident = resolver
.register_unnamed(Some((type_.clone(), ptx_parser::StateSpace::Reg)));
post_st.push(Statement::Instruction(ast::Instruction::St {
data: ast::StData {
qualifier: ast::LdStQualifier::Weak,
state_space: ast::StateSpace::Param,
caching: ast::StCacheOperator::Writethrough,
typ: type_.clone(),
},
arguments: ast::StArgs {
src1: old_name,
src2: *ident,
},
}));
}
}
result.push(Statement::Instruction(ast::Instruction::Call {
data,
arguments,
}));
result.extend(post_st.into_iter());
}
Statement::Instruction(ast::Instruction::Ret { data }) => {
for (old_name, new_name, type_) in remap_returns.iter() {
result.push(Statement::Instruction(ast::Instruction::Ld { result.push(Statement::Instruction(ast::Instruction::Ld {
data: ast::LdDetails { data: ast::LdDetails {
qualifier: ast::LdStQualifier::Weak, qualifier: ast::LdStQualifier::Weak,
state_space: ast::StateSpace::Reg, state_space: ast::StateSpace::Param,
caching: ast::LdCacheOperator::Cached, caching: ast::LdCacheOperator::Cached,
typ: type_, typ: type_.clone(),
non_coherent: false, non_coherent: false,
}, },
arguments: ast::LdArgs { arguments: ast::LdArgs {
dst: new_name, dst: *new_name,
src: old_name, src: *old_name,
}, },
})); }));
} }
result.push(statement); result.push(Statement::Instruction(ast::Instruction::Ret { data }));
} }
statement => { statement => {
result.push(statement); result.push(statement);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,181 +0,0 @@
use super::*;
use ptx_parser as ast;
pub(super) fn run<'a, 'b>(
func: Vec<TypedStatement>,
id_def: &'b mut MutableNumericIdResolver<'a>,
) -> Result<Vec<ExpandedStatement>, TranslateError> {
let mut result = Vec::with_capacity(func.len());
for s in func {
match s {
Statement::Label(id) => result.push(Statement::Label(id)),
Statement::Conditional(bra) => result.push(Statement::Conditional(bra)),
Statement::LoadVar(details) => result.push(Statement::LoadVar(details)),
Statement::StoreVar(details) => result.push(Statement::StoreVar(details)),
Statement::RetValue(d, id) => result.push(Statement::RetValue(d, id)),
Statement::Conversion(conv) => result.push(Statement::Conversion(conv)),
Statement::Constant(c) => result.push(Statement::Constant(c)),
Statement::FunctionPointer(d) => result.push(Statement::FunctionPointer(d)),
s => {
let (new_statement, post_stmts) = {
let mut visitor = FlattenArguments::new(&mut result, id_def);
(s.visit_map(&mut visitor)?, visitor.post_stmts)
};
result.push(new_statement);
result.extend(post_stmts);
}
}
}
Ok(result)
}
struct FlattenArguments<'a, 'b> {
func: &'b mut Vec<ExpandedStatement>,
id_def: &'b mut MutableNumericIdResolver<'a>,
post_stmts: Vec<ExpandedStatement>,
}
impl<'a, 'b> FlattenArguments<'a, 'b> {
fn new(
func: &'b mut Vec<ExpandedStatement>,
id_def: &'b mut MutableNumericIdResolver<'a>,
) -> Self {
FlattenArguments {
func,
id_def,
post_stmts: Vec::new(),
}
}
fn reg(&mut self, name: SpirvWord) -> Result<SpirvWord, TranslateError> {
Ok(name)
}
fn reg_offset(
&mut self,
reg: SpirvWord,
offset: i32,
type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
_is_dst: bool,
) -> Result<SpirvWord, TranslateError> {
let (type_, state_space) = if let Some((type_, state_space)) = type_space {
(type_, state_space)
} else {
return Err(TranslateError::UntypedSymbol);
};
if state_space == ast::StateSpace::Reg {
let (reg_type, reg_space) = self.id_def.get_typed(reg)?;
if reg_space != ast::StateSpace::Reg {
return Err(error_mismatched_type());
}
let reg_scalar_type = match reg_type {
ast::Type::Scalar(underlying_type) => underlying_type,
_ => return Err(error_mismatched_type()),
};
let id_constant_stmt = self
.id_def
.register_intermediate(reg_type.clone(), ast::StateSpace::Reg);
self.func.push(Statement::Constant(ConstantDefinition {
dst: id_constant_stmt,
typ: reg_scalar_type,
value: ast::ImmediateValue::S64(offset as i64),
}));
let arith_details = match reg_scalar_type.kind() {
ast::ScalarKind::Signed => ast::ArithDetails::Integer(ast::ArithInteger {
type_: reg_scalar_type,
saturate: false,
}),
ast::ScalarKind::Unsigned | ast::ScalarKind::Bit => {
ast::ArithDetails::Integer(ast::ArithInteger {
type_: reg_scalar_type,
saturate: false,
})
}
_ => return Err(error_unreachable()),
};
let id_add_result = self.id_def.register_intermediate(reg_type, state_space);
self.func
.push(Statement::Instruction(ast::Instruction::Add {
data: arith_details,
arguments: ast::AddArgs {
dst: id_add_result,
src1: reg,
src2: id_constant_stmt,
},
}));
Ok(id_add_result)
} else {
let id_constant_stmt = self.id_def.register_intermediate(
ast::Type::Scalar(ast::ScalarType::S64),
ast::StateSpace::Reg,
);
self.func.push(Statement::Constant(ConstantDefinition {
dst: id_constant_stmt,
typ: ast::ScalarType::S64,
value: ast::ImmediateValue::S64(offset as i64),
}));
let dst = self
.id_def
.register_intermediate(type_.clone(), state_space);
self.func.push(Statement::PtrAccess(PtrAccess {
underlying_type: type_.clone(),
state_space: state_space,
dst,
ptr_src: reg,
offset_src: id_constant_stmt,
}));
Ok(dst)
}
}
fn immediate(
&mut self,
value: ast::ImmediateValue,
type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
) -> Result<SpirvWord, TranslateError> {
let (scalar_t, state_space) =
if let Some((ast::Type::Scalar(scalar), state_space)) = type_space {
(*scalar, state_space)
} else {
return Err(TranslateError::UntypedSymbol);
};
let id = self
.id_def
.register_intermediate(ast::Type::Scalar(scalar_t), state_space);
self.func.push(Statement::Constant(ConstantDefinition {
dst: id,
typ: scalar_t,
value,
}));
Ok(id)
}
}
impl<'a, 'b> ast::VisitorMap<TypedOperand, SpirvWord, TranslateError> for FlattenArguments<'a, 'b> {
fn visit(
&mut self,
args: TypedOperand,
type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
is_dst: bool,
_relaxed_type_check: bool,
) -> Result<SpirvWord, TranslateError> {
match args {
TypedOperand::Reg(r) => self.reg(r),
TypedOperand::Imm(x) => self.immediate(x, type_space),
TypedOperand::RegOffset(reg, offset) => {
self.reg_offset(reg, offset, type_space, is_dst)
}
TypedOperand::VecMember(..) => Err(error_unreachable()),
}
}
fn visit_ident(
&mut self,
name: <TypedOperand as ptx_parser::Operand>::Ident,
_type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
_is_dst: bool,
_relaxed_type_check: bool,
) -> Result<<SpirvWord as ptx_parser::Operand>::Ident, TranslateError> {
self.reg(name)
}
}

View File

@ -189,15 +189,12 @@ impl<'a, 'input> FlattenArguments<'a, 'input> {
fn vec_member( fn vec_member(
&mut self, &mut self,
vector_src: SpirvWord, vector_ident: SpirvWord,
member: u8, member: u8,
_type_space: Option<(&ast::Type, ast::StateSpace)>, _type_space: Option<(&ast::Type, ast::StateSpace)>,
is_dst: bool, is_dst: bool,
) -> Result<SpirvWord, TranslateError> { ) -> Result<SpirvWord, TranslateError> {
if is_dst { let (vector_width, scalar_type, space) = match self.resolver.get_typed(vector_ident)? {
return Err(error_mismatched_type());
}
let (vector_width, scalar_type, space) = match self.resolver.get_typed(vector_src)? {
(ast::Type::Vector(vector_width, scalar_t), space) => { (ast::Type::Vector(vector_width, scalar_t), space) => {
(*vector_width, *scalar_t, *space) (*vector_width, *scalar_t, *space)
} }
@ -206,35 +203,46 @@ impl<'a, 'input> FlattenArguments<'a, 'input> {
let temporary = self let temporary = self
.resolver .resolver
.register_unnamed(Some((scalar_type.into(), space))); .register_unnamed(Some((scalar_type.into(), space)));
self.result.push(Statement::VectorAccess(VectorAccess { if is_dst {
scalar_type, self.post_stmts.push(Statement::VectorWrite(VectorWrite {
vector_width, scalar_type,
dst: temporary, vector_width,
src: vector_src, vector_dst: vector_ident,
member: member, vector_src: vector_ident,
})); scalar_src: temporary,
member,
}));
} else {
self.result.push(Statement::VectorRead(VectorRead {
scalar_type,
vector_width,
scalar_dst: temporary,
vector_src: vector_ident,
member,
}));
}
Ok(temporary) Ok(temporary)
} }
fn vec_pack( fn vec_pack(
&mut self, &mut self,
vecs: Vec<SpirvWord>, vector_elements: Vec<SpirvWord>,
type_space: Option<(&ast::Type, ast::StateSpace)>, type_space: Option<(&ast::Type, ast::StateSpace)>,
is_dst: bool, is_dst: bool,
relaxed_type_check: bool, relaxed_type_check: bool,
) -> Result<SpirvWord, TranslateError> { ) -> Result<SpirvWord, TranslateError> {
let (scalar_t, state_space) = match type_space { let (width, scalar_t, state_space) = match type_space {
Some((ast::Type::Vector(_, scalar_t), space)) => (*scalar_t, space), Some((ast::Type::Vector(width, scalar_t), space)) => (*width, *scalar_t, space),
_ => return Err(error_mismatched_type()), _ => return Err(error_mismatched_type()),
}; };
let temp_vec = self let temporary_vector = self
.resolver .resolver
.register_unnamed(Some((scalar_t.into(), state_space))); .register_unnamed(Some((ast::Type::Vector(width, scalar_t), state_space)));
let statement = Statement::RepackVector(RepackVectorDetails { let statement = Statement::RepackVector(RepackVectorDetails {
is_extract: is_dst, is_extract: is_dst,
typ: scalar_t, typ: scalar_t,
packed: temp_vec, packed: temporary_vector,
unpacked: vecs, unpacked: vector_elements,
relaxed_type_check, relaxed_type_check,
}); });
if is_dst { if is_dst {
@ -242,7 +250,7 @@ impl<'a, 'input> FlattenArguments<'a, 'input> {
} else { } else {
self.result.push(statement); self.result.push(statement);
} }
Ok(temp_vec) Ok(temporary_vector)
} }
} }
@ -273,7 +281,7 @@ impl<'a, 'b> ast::VisitorMap<ast::ParsedOperand<SpirvWord>, SpirvWord, Translate
fn visit_ident( fn visit_ident(
&mut self, &mut self,
name: <TypedOperand as ast::Operand>::Ident, name: SpirvWord,
_type_space: Option<(&ast::Type, ast::StateSpace)>, _type_space: Option<(&ast::Type, ast::StateSpace)>,
_is_dst: bool, _is_dst: bool,
_relaxed_type_check: bool, _relaxed_type_check: bool,

View File

@ -1,281 +0,0 @@
use super::*;
pub(super) fn run<'input, 'b>(
sorted_statements: Vec<ExpandedStatement>,
ptx_impl_imports: &mut HashMap<String, Directive>,
id_def: &mut NumericIdResolver,
) -> Result<(Vec<ExpandedStatement>, Vec<ast::Variable<SpirvWord>>), TranslateError> {
let mut local = Vec::with_capacity(sorted_statements.len());
let mut global = Vec::new();
for statement in sorted_statements {
match statement {
Statement::Variable(
var @ ast::Variable {
state_space: ast::StateSpace::Shared,
..
},
)
| Statement::Variable(
var @ ast::Variable {
state_space: ast::StateSpace::Global,
..
},
) => global.push(var),
Statement::Instruction(ast::Instruction::Bfe { data, arguments }) => {
let fn_name = [ZLUDA_PTX_PREFIX, "bfe_", scalar_to_ptx_name(data)].concat();
local.push(instruction_to_fn_call(
id_def,
ptx_impl_imports,
ast::Instruction::Bfe { data, arguments },
fn_name,
)?);
}
Statement::Instruction(ast::Instruction::Bfi { data, arguments }) => {
let fn_name = [ZLUDA_PTX_PREFIX, "bfi_", scalar_to_ptx_name(data)].concat();
local.push(instruction_to_fn_call(
id_def,
ptx_impl_imports,
ast::Instruction::Bfi { data, arguments },
fn_name,
)?);
}
Statement::Instruction(ast::Instruction::Brev { data, arguments }) => {
let fn_name: String =
[ZLUDA_PTX_PREFIX, "brev_", scalar_to_ptx_name(data)].concat();
local.push(instruction_to_fn_call(
id_def,
ptx_impl_imports,
ast::Instruction::Brev { data, arguments },
fn_name,
)?);
}
Statement::Instruction(ast::Instruction::Activemask { arguments }) => {
let fn_name = [ZLUDA_PTX_PREFIX, "activemask"].concat();
local.push(instruction_to_fn_call(
id_def,
ptx_impl_imports,
ast::Instruction::Activemask { arguments },
fn_name,
)?);
}
Statement::Instruction(ast::Instruction::Atom {
data:
data @ ast::AtomDetails {
op: ast::AtomicOp::IncrementWrap,
semantics,
scope,
space,
..
},
arguments,
}) => {
let fn_name = [
ZLUDA_PTX_PREFIX,
"atom_",
semantics_to_ptx_name(semantics),
"_",
scope_to_ptx_name(scope),
"_",
space_to_ptx_name(space),
"_inc",
]
.concat();
local.push(instruction_to_fn_call(
id_def,
ptx_impl_imports,
ast::Instruction::Atom { data, arguments },
fn_name,
)?);
}
Statement::Instruction(ast::Instruction::Atom {
data:
data @ ast::AtomDetails {
op: ast::AtomicOp::DecrementWrap,
semantics,
scope,
space,
..
},
arguments,
}) => {
let fn_name = [
ZLUDA_PTX_PREFIX,
"atom_",
semantics_to_ptx_name(semantics),
"_",
scope_to_ptx_name(scope),
"_",
space_to_ptx_name(space),
"_dec",
]
.concat();
local.push(instruction_to_fn_call(
id_def,
ptx_impl_imports,
ast::Instruction::Atom { data, arguments },
fn_name,
)?);
}
Statement::Instruction(ast::Instruction::Atom {
data:
data @ ast::AtomDetails {
op: ast::AtomicOp::FloatAdd,
semantics,
scope,
space,
..
},
arguments,
}) => {
let scalar_type = match data.type_ {
ptx_parser::Type::Scalar(scalar) => scalar,
_ => return Err(error_unreachable()),
};
let fn_name = [
ZLUDA_PTX_PREFIX,
"atom_",
semantics_to_ptx_name(semantics),
"_",
scope_to_ptx_name(scope),
"_",
space_to_ptx_name(space),
"_add_",
scalar_to_ptx_name(scalar_type),
]
.concat();
local.push(instruction_to_fn_call(
id_def,
ptx_impl_imports,
ast::Instruction::Atom { data, arguments },
fn_name,
)?);
}
s => local.push(s),
}
}
Ok((local, global))
}
fn instruction_to_fn_call(
id_defs: &mut NumericIdResolver,
ptx_impl_imports: &mut HashMap<String, Directive>,
inst: ast::Instruction<SpirvWord>,
fn_name: String,
) -> Result<ExpandedStatement, TranslateError> {
let mut arguments = Vec::new();
ast::visit_map(inst, &mut |operand,
type_space: Option<(
&ast::Type,
ast::StateSpace,
)>,
is_dst,
_| {
let (typ, space) = match type_space {
Some((typ, space)) => (typ.clone(), space),
None => return Err(error_unreachable()),
};
arguments.push((operand, is_dst, typ, space));
Ok(SpirvWord(0))
})?;
let return_arguments_count = arguments
.iter()
.position(|(desc, is_dst, _, _)| !is_dst)
.unwrap_or(arguments.len());
let (return_arguments, input_arguments) = arguments.split_at(return_arguments_count);
let fn_id = register_external_fn_call(
id_defs,
ptx_impl_imports,
fn_name,
return_arguments
.iter()
.map(|(_, _, typ, state)| (typ, *state)),
input_arguments
.iter()
.map(|(_, _, typ, state)| (typ, *state)),
)?;
Ok(Statement::Instruction(ast::Instruction::Call {
data: ast::CallDetails {
uniform: false,
return_arguments: return_arguments
.iter()
.map(|(_, _, typ, state)| (typ.clone(), *state))
.collect::<Vec<_>>(),
input_arguments: input_arguments
.iter()
.map(|(_, _, typ, state)| (typ.clone(), *state))
.collect::<Vec<_>>(),
},
arguments: ast::CallArgs {
return_arguments: return_arguments
.iter()
.map(|(name, _, _, _)| *name)
.collect::<Vec<_>>(),
func: fn_id,
input_arguments: input_arguments
.iter()
.map(|(name, _, _, _)| *name)
.collect::<Vec<_>>(),
},
}))
}
fn scalar_to_ptx_name(this: ast::ScalarType) -> &'static str {
match this {
ast::ScalarType::B8 => "b8",
ast::ScalarType::B16 => "b16",
ast::ScalarType::B32 => "b32",
ast::ScalarType::B64 => "b64",
ast::ScalarType::B128 => "b128",
ast::ScalarType::U8 => "u8",
ast::ScalarType::U16 => "u16",
ast::ScalarType::U16x2 => "u16x2",
ast::ScalarType::U32 => "u32",
ast::ScalarType::U64 => "u64",
ast::ScalarType::S8 => "s8",
ast::ScalarType::S16 => "s16",
ast::ScalarType::S16x2 => "s16x2",
ast::ScalarType::S32 => "s32",
ast::ScalarType::S64 => "s64",
ast::ScalarType::F16 => "f16",
ast::ScalarType::F16x2 => "f16x2",
ast::ScalarType::F32 => "f32",
ast::ScalarType::F64 => "f64",
ast::ScalarType::BF16 => "bf16",
ast::ScalarType::BF16x2 => "bf16x2",
ast::ScalarType::Pred => "pred",
}
}
fn semantics_to_ptx_name(this: ast::AtomSemantics) -> &'static str {
match this {
ast::AtomSemantics::Relaxed => "relaxed",
ast::AtomSemantics::Acquire => "acquire",
ast::AtomSemantics::Release => "release",
ast::AtomSemantics::AcqRel => "acq_rel",
}
}
fn scope_to_ptx_name(this: ast::MemScope) -> &'static str {
match this {
ast::MemScope::Cta => "cta",
ast::MemScope::Gpu => "gpu",
ast::MemScope::Sys => "sys",
ast::MemScope::Cluster => "cluster",
}
}
fn space_to_ptx_name(this: ast::StateSpace) -> &'static str {
match this {
ast::StateSpace::Generic => "generic",
ast::StateSpace::Global => "global",
ast::StateSpace::Shared => "shared",
ast::StateSpace::Reg => "reg",
ast::StateSpace::Const => "const",
ast::StateSpace::Local => "local",
ast::StateSpace::Param => "param",
ast::StateSpace::SharedCluster => "shared_cluster",
ast::StateSpace::ParamEntry => "param_entry",
ast::StateSpace::SharedCta => "shared_cta",
ast::StateSpace::ParamFunc => "param_func",
}
}

View File

@ -1,130 +0,0 @@
use super::*;
use std::collections::HashMap;
pub(super) fn run<'a, 'b, 'input>(
ptx_impl_imports: &'a mut HashMap<String, Directive<'input>>,
typed_statements: Vec<TypedStatement>,
numeric_id_defs: &'a mut NumericIdResolver<'b>,
) -> Result<Vec<TypedStatement>, TranslateError> {
let result = Vec::with_capacity(typed_statements.len());
let mut sreg_sresolver = SpecialRegisterResolver {
ptx_impl_imports,
numeric_id_defs,
result,
};
for statement in typed_statements {
let statement = statement.visit_map(&mut sreg_sresolver)?;
sreg_sresolver.result.push(statement);
}
Ok(sreg_sresolver.result)
}
struct SpecialRegisterResolver<'a, 'b, 'input> {
ptx_impl_imports: &'a mut HashMap<String, Directive<'input>>,
numeric_id_defs: &'a mut NumericIdResolver<'b>,
result: Vec<TypedStatement>,
}
impl<'a, 'b, 'input> ast::VisitorMap<TypedOperand, TypedOperand, TranslateError>
for SpecialRegisterResolver<'a, 'b, 'input>
{
fn visit(
&mut self,
operand: TypedOperand,
_type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
is_dst: bool,
_relaxed_type_check: bool,
) -> Result<TypedOperand, TranslateError> {
operand.map(|name, vector_index| self.replace_sreg(name, is_dst, vector_index))
}
fn visit_ident(
&mut self,
args: SpirvWord,
_type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
is_dst: bool,
_relaxed_type_check: bool,
) -> Result<SpirvWord, TranslateError> {
self.replace_sreg(args, is_dst, None)
}
}
impl<'a, 'b, 'input> SpecialRegisterResolver<'a, 'b, 'input> {
fn replace_sreg(
&mut self,
name: SpirvWord,
is_dst: bool,
vector_index: Option<u8>,
) -> Result<SpirvWord, TranslateError> {
if let Some(sreg) = self.numeric_id_defs.special_registers.get(name) {
if is_dst {
return Err(error_mismatched_type());
}
let input_arguments = match (vector_index, sreg.get_function_input_type()) {
(Some(idx), Some(inp_type)) => {
if inp_type != ast::ScalarType::U8 {
return Err(TranslateError::Unreachable);
}
let constant = self.numeric_id_defs.register_intermediate(Some((
ast::Type::Scalar(inp_type),
ast::StateSpace::Reg,
)));
self.result.push(Statement::Constant(ConstantDefinition {
dst: constant,
typ: inp_type,
value: ast::ImmediateValue::U64(idx as u64),
}));
vec![(
TypedOperand::Reg(constant),
ast::Type::Scalar(inp_type),
ast::StateSpace::Reg,
)]
}
(None, None) => Vec::new(),
_ => return Err(error_mismatched_type()),
};
let ocl_fn_name = [ZLUDA_PTX_PREFIX, sreg.get_unprefixed_function_name()].concat();
let return_type = sreg.get_function_return_type();
let fn_result = self.numeric_id_defs.register_intermediate(Some((
ast::Type::Scalar(return_type),
ast::StateSpace::Reg,
)));
let return_arguments = vec![(
fn_result,
ast::Type::Scalar(return_type),
ast::StateSpace::Reg,
)];
let fn_call = register_external_fn_call(
self.numeric_id_defs,
self.ptx_impl_imports,
ocl_fn_name.to_string(),
return_arguments.iter().map(|(_, typ, space)| (typ, *space)),
input_arguments.iter().map(|(_, typ, space)| (typ, *space)),
)?;
let data = ast::CallDetails {
uniform: false,
return_arguments: return_arguments
.iter()
.map(|(_, typ, space)| (typ.clone(), *space))
.collect(),
input_arguments: input_arguments
.iter()
.map(|(_, typ, space)| (typ.clone(), *space))
.collect(),
};
let arguments = ast::CallArgs {
return_arguments: return_arguments.iter().map(|(name, _, _)| *name).collect(),
func: fn_call,
input_arguments: input_arguments.iter().map(|(name, _, _)| *name).collect(),
};
self.result
.push(Statement::Instruction(ast::Instruction::Call {
data,
arguments,
}));
Ok(fn_result)
} else {
Ok(name)
}
}
}

View File

@ -31,10 +31,10 @@ pub(super) fn run<'a, 'input>(
sreg_to_function, sreg_to_function,
result: Vec::new(), result: Vec::new(),
}; };
directives for directive in directives.into_iter() {
.into_iter() result.push(run_directive(&mut visitor, directive)?);
.map(|directive| run_directive(&mut visitor, directive)) }
.collect::<Result<Vec<_>, _>>() Ok(result)
} }
fn run_directive<'a, 'input>( fn run_directive<'a, 'input>(
@ -112,7 +112,7 @@ impl<'a, 'b, 'input>
is_dst: bool, is_dst: bool,
_relaxed_type_check: bool, _relaxed_type_check: bool,
) -> Result<SpirvWord, TranslateError> { ) -> Result<SpirvWord, TranslateError> {
self.replace_sreg(args, None, is_dst) Ok(self.replace_sreg(args, None, is_dst)?.unwrap_or(args))
} }
} }
@ -122,7 +122,7 @@ impl<'a, 'b, 'input> SpecialRegisterResolver<'a, 'input> {
name: SpirvWord, name: SpirvWord,
vector_index: Option<u8>, vector_index: Option<u8>,
is_dst: bool, is_dst: bool,
) -> Result<SpirvWord, TranslateError> { ) -> Result<Option<SpirvWord>, TranslateError> {
if let Some(sreg) = self.special_registers.get(name) { if let Some(sreg) = self.special_registers.get(name) {
if is_dst { if is_dst {
return Err(error_mismatched_type()); return Err(error_mismatched_type());
@ -179,30 +179,33 @@ impl<'a, 'b, 'input> SpecialRegisterResolver<'a, 'input> {
data, data,
arguments, arguments,
})); }));
Ok(fn_result) Ok(Some(fn_result))
} else { } else {
Ok(name) Ok(None)
} }
} }
} }
pub fn map_operand<T, U, Err>( pub fn map_operand<T: Copy, Err>(
this: ast::ParsedOperand<T>, this: ast::ParsedOperand<T>,
fn_: &mut impl FnMut(T, Option<u8>) -> Result<U, Err>, fn_: &mut impl FnMut(T, Option<u8>) -> Result<Option<T>, Err>,
) -> Result<ast::ParsedOperand<U>, Err> { ) -> Result<ast::ParsedOperand<T>, Err> {
Ok(match this { Ok(match this {
ast::ParsedOperand::Reg(ident) => ast::ParsedOperand::Reg(fn_(ident, None)?), ast::ParsedOperand::Reg(ident) => {
ast::ParsedOperand::Reg(fn_(ident, None)?.unwrap_or(ident))
}
ast::ParsedOperand::RegOffset(ident, offset) => { ast::ParsedOperand::RegOffset(ident, offset) => {
ast::ParsedOperand::RegOffset(fn_(ident, None)?, offset) ast::ParsedOperand::RegOffset(fn_(ident, None)?.unwrap_or(ident), offset)
} }
ast::ParsedOperand::Imm(imm) => ast::ParsedOperand::Imm(imm), ast::ParsedOperand::Imm(imm) => ast::ParsedOperand::Imm(imm),
ast::ParsedOperand::VecMember(ident, member) => { ast::ParsedOperand::VecMember(ident, member) => match fn_(ident, Some(member))? {
ast::ParsedOperand::Reg(fn_(ident, Some(member))?) Some(ident) => ast::ParsedOperand::Reg(ident),
} None => ast::ParsedOperand::VecMember(ident, member),
},
ast::ParsedOperand::VecPack(idents) => ast::ParsedOperand::VecPack( ast::ParsedOperand::VecPack(idents) => ast::ParsedOperand::VecPack(
idents idents
.into_iter() .into_iter()
.map(|ident| fn_(ident, None)) .map(|ident| Ok(fn_(ident, None)?.unwrap_or(ident)))
.collect::<Result<Vec<_>, _>>()?, .collect::<Result<Vec<_>, _>>()?,
), ),
}) })

View File

@ -5,7 +5,7 @@ pub(super) fn run<'input>(
) -> Result<Vec<Directive2<'input, ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> { ) -> Result<Vec<Directive2<'input, ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
let mut result = Vec::with_capacity(directives.len()); let mut result = Vec::with_capacity(directives.len());
for mut directive in directives.into_iter() { for mut directive in directives.into_iter() {
run_directive(&mut result, &mut directive); run_directive(&mut result, &mut directive)?;
result.push(directive); result.push(directive);
} }
Ok(result) Ok(result)

View File

@ -1,7 +1,4 @@
use super::*; use super::*;
use ptx_parser::VisitorMap;
use rustc_hash::FxHashSet;
// This pass: // This pass:
// * Turns all .local, .param and .reg in-body variables into .local variables // * Turns all .local, .param and .reg in-body variables into .local variables
// (if _not_ an input method argument) // (if _not_ an input method argument)
@ -40,9 +37,6 @@ fn run_method<'a, 'input>(
method: Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>, method: Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>,
) -> Result<Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> { ) -> Result<Function2<'input, ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
let mut func_decl = method.func_decl; let mut func_decl = method.func_decl;
for arg in func_decl.return_arguments.iter_mut() {
visitor.visit_variable(arg)?;
}
let is_kernel = func_decl.name.is_kernel(); let is_kernel = func_decl.name.is_kernel();
if is_kernel { if is_kernel {
for arg in func_decl.input_arguments.iter_mut() { for arg in func_decl.input_arguments.iter_mut() {
@ -52,17 +46,21 @@ fn run_method<'a, 'input>(
let new_name = visitor let new_name = visitor
.resolver .resolver
.register_unnamed(Some((arg.v_type.clone(), new_space))); .register_unnamed(Some((arg.v_type.clone(), new_space)));
visitor.input_argument(old_name, new_name, old_space); visitor.input_argument(old_name, new_name, old_space)?;
arg.name = new_name; arg.name = new_name;
arg.state_space = new_space; arg.state_space = new_space;
} }
}; };
for arg in func_decl.return_arguments.iter_mut() {
visitor.visit_variable(arg)?;
}
let return_arguments = &func_decl.return_arguments[..];
let body = method let body = method
.body .body
.map(move |statements| { .map(move |statements| {
let mut result = Vec::with_capacity(statements.len()); let mut result = Vec::with_capacity(statements.len());
for statement in statements { for statement in statements {
run_statement(&mut visitor, &mut result, statement)?; run_statement(&mut visitor, return_arguments, &mut result, statement)?;
} }
Ok::<_, TranslateError>(result) Ok::<_, TranslateError>(result)
}) })
@ -79,10 +77,33 @@ fn run_method<'a, 'input>(
fn run_statement<'a, 'input>( fn run_statement<'a, 'input>(
visitor: &mut InsertMemSSAVisitor<'a, 'input>, visitor: &mut InsertMemSSAVisitor<'a, 'input>,
return_arguments: &[ast::Variable<SpirvWord>],
result: &mut Vec<ExpandedStatement>, result: &mut Vec<ExpandedStatement>,
statement: ExpandedStatement, statement: ExpandedStatement,
) -> Result<(), TranslateError> { ) -> Result<(), TranslateError> {
match statement { match statement {
Statement::Instruction(ast::Instruction::Ret { data }) => {
let statement = if return_arguments.is_empty() {
Statement::Instruction(ast::Instruction::Ret { data })
} else {
Statement::RetValue(
data,
return_arguments
.iter()
.map(|arg| {
if arg.state_space != ast::StateSpace::Local {
return Err(error_unreachable());
}
Ok((arg.name, arg.v_type.clone()))
})
.collect::<Result<Vec<_>, _>>()?,
)
};
let new_statement = statement.visit_map(visitor)?;
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
result.push(new_statement);
result.extend(visitor.post.drain(..).map(Statement::Instruction));
}
Statement::Variable(mut var) => { Statement::Variable(mut var) => {
visitor.visit_variable(&mut var)?; visitor.visit_variable(&mut var)?;
result.push(Statement::Variable(var)); result.push(Statement::Variable(var));
@ -154,7 +175,7 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
old_name: SpirvWord, old_name: SpirvWord,
new_name: SpirvWord, new_name: SpirvWord,
old_space: ast::StateSpace, old_space: ast::StateSpace,
) -> Result<(), TranslateError> { ) -> Result<bool, TranslateError> {
Ok(match old_space { Ok(match old_space {
ast::StateSpace::Reg => { ast::StateSpace::Reg => {
self.variables.insert( self.variables.insert(
@ -164,6 +185,7 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
type_: type_.clone(), type_: type_.clone(),
}, },
); );
true
} }
ast::StateSpace::Param => { ast::StateSpace::Param => {
self.variables.insert( self.variables.insert(
@ -174,19 +196,18 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
name: new_name, name: new_name,
}, },
); );
true
} }
// Good as-is // Good as-is
ast::StateSpace::Local => {} ast::StateSpace::Local
// Will be pulled into global scope later | ast::StateSpace::Generic
ast::StateSpace::Generic
| ast::StateSpace::SharedCluster | ast::StateSpace::SharedCluster
| ast::StateSpace::Global | ast::StateSpace::Global
| ast::StateSpace::Const | ast::StateSpace::Const
| ast::StateSpace::SharedCta | ast::StateSpace::SharedCta
| ast::StateSpace::Shared => {} | ast::StateSpace::Shared
ast::StateSpace::ParamEntry | ast::StateSpace::ParamFunc => { | ast::StateSpace::ParamEntry
return Err(error_unreachable()) | ast::StateSpace::ParamFunc => return Err(error_unreachable()),
}
}) })
} }
@ -239,17 +260,28 @@ impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
} }
fn visit_variable(&mut self, var: &mut ast::Variable<SpirvWord>) -> Result<(), TranslateError> { fn visit_variable(&mut self, var: &mut ast::Variable<SpirvWord>) -> Result<(), TranslateError> {
if var.state_space != ast::StateSpace::Local { let old_space = match var.state_space {
let old_name = var.name; space @ (ptx_parser::StateSpace::Reg | ptx_parser::StateSpace::Param) => space,
let old_space = var.state_space; // Do nothing
let new_space = ast::StateSpace::Local; ptx_parser::StateSpace::Local => return Ok(()),
let new_name = self // Handled by another pass
.resolver ptx_parser::StateSpace::Generic
.register_unnamed(Some((var.v_type.clone(), new_space))); | ptx_parser::StateSpace::SharedCluster
self.variable(&var.v_type, old_name, new_name, old_space)?; | ptx_parser::StateSpace::ParamEntry
var.name = new_name; | ptx_parser::StateSpace::Global
var.state_space = new_space; | ptx_parser::StateSpace::SharedCta
} | ptx_parser::StateSpace::Const
| ptx_parser::StateSpace::Shared
| ptx_parser::StateSpace::ParamFunc => return Ok(()),
};
let old_name = var.name;
let new_space = ast::StateSpace::Local;
let new_name = self
.resolver
.register_unnamed(Some((var.v_type.clone(), new_space)));
self.variable(&var.v_type, old_name, new_name, old_space)?;
var.name = new_name;
var.state_space = new_space;
Ok(()) Ok(())
} }
} }
@ -260,9 +292,9 @@ impl<'a, 'input> ast::VisitorMap<SpirvWord, SpirvWord, TranslateError>
fn visit( fn visit(
&mut self, &mut self,
ident: SpirvWord, ident: SpirvWord,
type_space: Option<(&ast::Type, ast::StateSpace)>, _type_space: Option<(&ast::Type, ast::StateSpace)>,
is_dst: bool, is_dst: bool,
relaxed_type_check: bool, _relaxed_type_check: bool,
) -> Result<SpirvWord, TranslateError> { ) -> Result<SpirvWord, TranslateError> {
if let Some(remap) = self.variables.get(&ident) { if let Some(remap) = self.variables.get(&ident) {
match remap { match remap {

View File

@ -1,438 +0,0 @@
use std::mem;
use super::*;
use ptx_parser as ast;
/*
There are several kinds of implicit conversions in PTX:
* auto-bitcast: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#type-information-for-instructions-and-operands
* special ld/st/cvt conversion rules: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#operand-size-exceeding-instruction-type-size
- ld.param: not documented, but for instruction `ld.param.<type> x, [y]`,
semantics are to first zext/chop/bitcast `y` as needed and then do
documented special ld/st/cvt conversion rules for destination operands
- st.param [x] y (used as function return arguments) same rule as above applies
- generic/global ld: for instruction `ld x, [y]`, y must be of type
b64/u64/s64, which is bitcast to a pointer, dereferenced and then
documented special ld/st/cvt conversion rules are applied to dst
- generic/global st: for instruction `st [x], y`, x must be of type
b64/u64/s64, which is bitcast to a pointer
*/
pub(super) fn run(
func: Vec<ExpandedStatement>,
id_def: &mut MutableNumericIdResolver,
) -> Result<Vec<ExpandedStatement>, TranslateError> {
let mut result = Vec::with_capacity(func.len());
for s in func.into_iter() {
match s {
Statement::Instruction(inst) => {
insert_implicit_conversions_impl(
&mut result,
id_def,
Statement::Instruction(inst),
)?;
}
Statement::PtrAccess(access) => {
insert_implicit_conversions_impl(
&mut result,
id_def,
Statement::PtrAccess(access),
)?;
}
Statement::RepackVector(repack) => {
insert_implicit_conversions_impl(
&mut result,
id_def,
Statement::RepackVector(repack),
)?;
}
Statement::VectorAccess(vector_access) => {
insert_implicit_conversions_impl(
&mut result,
id_def,
Statement::VectorAccess(vector_access),
)?;
}
s @ Statement::Conditional(_)
| s @ Statement::Conversion(_)
| s @ Statement::Label(_)
| s @ Statement::Constant(_)
| s @ Statement::Variable(_)
| s @ Statement::LoadVar(..)
| s @ Statement::StoreVar(..)
| s @ Statement::RetValue(..)
| s @ Statement::FunctionPointer(..) => result.push(s),
}
}
Ok(result)
}
fn insert_implicit_conversions_impl(
func: &mut Vec<ExpandedStatement>,
id_def: &mut MutableNumericIdResolver,
stmt: ExpandedStatement,
) -> Result<(), TranslateError> {
let mut post_conv = Vec::new();
let statement = stmt.visit_map::<SpirvWord, TranslateError>(
&mut |operand,
type_state: Option<(&ast::Type, ast::StateSpace)>,
is_dst,
relaxed_type_check| {
let (instr_type, instruction_space) = match type_state {
None => return Ok(operand),
Some(t) => t,
};
let (operand_type, operand_space) = id_def.get_typed(operand)?;
let conversion_fn = if relaxed_type_check {
if is_dst {
should_convert_relaxed_dst_wrapper
} else {
should_convert_relaxed_src_wrapper
}
} else {
default_implicit_conversion
};
match conversion_fn(
(operand_space, &operand_type),
(instruction_space, instr_type),
)? {
Some(conv_kind) => {
let conv_output = if is_dst { &mut post_conv } else { &mut *func };
let mut from_type = instr_type.clone();
let mut from_space = instruction_space;
let mut to_type = operand_type;
let mut to_space = operand_space;
let mut src =
id_def.register_intermediate(instr_type.clone(), instruction_space);
let mut dst = operand;
let result = Ok::<_, TranslateError>(src);
if !is_dst {
mem::swap(&mut src, &mut dst);
mem::swap(&mut from_type, &mut to_type);
mem::swap(&mut from_space, &mut to_space);
}
conv_output.push(Statement::Conversion(ImplicitConversion {
src,
dst,
from_type,
from_space,
to_type,
to_space,
kind: conv_kind,
}));
result
}
None => Ok(operand),
}
},
)?;
func.push(statement);
func.append(&mut post_conv);
Ok(())
}
pub(crate) fn default_implicit_conversion(
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
) -> Result<Option<ConversionKind>, TranslateError> {
if instruction_space == ast::StateSpace::Reg {
if operand_space == ast::StateSpace::Reg {
if let (ast::Type::Vector(vec_len, vec_underlying_type), ast::Type::Scalar(scalar)) =
(operand_type, instruction_type)
{
if scalar.kind() == ast::ScalarKind::Bit
&& scalar.size_of() == (vec_underlying_type.size_of() * vec_len)
{
return Ok(Some(ConversionKind::Default));
}
}
} else if is_addressable(operand_space) {
return Ok(Some(ConversionKind::AddressOf));
}
}
if instruction_space != operand_space {
default_implicit_conversion_space(
(operand_space, operand_type),
(instruction_space, instruction_type),
)
} else if instruction_type != operand_type {
default_implicit_conversion_type(instruction_space, operand_type, instruction_type)
} else {
Ok(None)
}
}
fn is_addressable(this: ast::StateSpace) -> bool {
match this {
ast::StateSpace::Const
| ast::StateSpace::Generic
| ast::StateSpace::Global
| ast::StateSpace::Local
| ast::StateSpace::Shared => true,
ast::StateSpace::Param | ast::StateSpace::Reg => false,
ast::StateSpace::SharedCluster
| ast::StateSpace::SharedCta
| ast::StateSpace::ParamEntry
| ast::StateSpace::ParamFunc => todo!(),
}
}
// Space is different
fn default_implicit_conversion_space(
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
) -> Result<Option<ConversionKind>, TranslateError> {
if (instruction_space == ast::StateSpace::Generic && coerces_to_generic(operand_space))
|| (operand_space == ast::StateSpace::Generic && coerces_to_generic(instruction_space))
{
Ok(Some(ConversionKind::PtrToPtr))
} else if operand_space == ast::StateSpace::Reg {
match operand_type {
ast::Type::Pointer(operand_ptr_type, operand_ptr_space)
if *operand_ptr_space == instruction_space =>
{
if instruction_type != &ast::Type::Scalar(*operand_ptr_type) {
Ok(Some(ConversionKind::PtrToPtr))
} else {
Ok(None)
}
}
// TODO: 32 bit
ast::Type::Scalar(ast::ScalarType::B64)
| ast::Type::Scalar(ast::ScalarType::U64)
| ast::Type::Scalar(ast::ScalarType::S64) => match instruction_space {
ast::StateSpace::Global
| ast::StateSpace::Generic
| ast::StateSpace::Const
| ast::StateSpace::Local
| ast::StateSpace::Shared => Ok(Some(ConversionKind::BitToPtr)),
_ => Err(error_mismatched_type()),
},
ast::Type::Scalar(ast::ScalarType::B32)
| ast::Type::Scalar(ast::ScalarType::U32)
| ast::Type::Scalar(ast::ScalarType::S32) => match instruction_space {
ast::StateSpace::Const | ast::StateSpace::Local | ast::StateSpace::Shared => {
Ok(Some(ConversionKind::BitToPtr))
}
_ => Err(error_mismatched_type()),
},
_ => Err(error_mismatched_type()),
}
} else if instruction_space == ast::StateSpace::Reg {
match instruction_type {
ast::Type::Pointer(instruction_ptr_type, instruction_ptr_space)
if operand_space == *instruction_ptr_space =>
{
if operand_type != &ast::Type::Scalar(*instruction_ptr_type) {
Ok(Some(ConversionKind::PtrToPtr))
} else {
Ok(None)
}
}
_ => Err(error_mismatched_type()),
}
} else {
Err(error_mismatched_type())
}
}
// Space is same, but type is different
fn default_implicit_conversion_type(
space: ast::StateSpace,
operand_type: &ast::Type,
instruction_type: &ast::Type,
) -> Result<Option<ConversionKind>, TranslateError> {
if space == ast::StateSpace::Reg {
if should_bitcast(instruction_type, operand_type) {
Ok(Some(ConversionKind::Default))
} else {
Err(TranslateError::MismatchedType)
}
} else {
Ok(Some(ConversionKind::PtrToPtr))
}
}
fn coerces_to_generic(this: ast::StateSpace) -> bool {
match this {
ast::StateSpace::Global
| ast::StateSpace::Const
| ast::StateSpace::Local
| ptx_parser::StateSpace::SharedCta
| ast::StateSpace::SharedCluster
| ast::StateSpace::Shared => true,
ast::StateSpace::Reg
| ast::StateSpace::Param
| ast::StateSpace::ParamEntry
| ast::StateSpace::ParamFunc
| ast::StateSpace::Generic => false,
}
}
fn should_bitcast(instr: &ast::Type, operand: &ast::Type) -> bool {
match (instr, operand) {
(ast::Type::Scalar(inst), ast::Type::Scalar(operand)) => {
if inst.size_of() != operand.size_of() {
return false;
}
match inst.kind() {
ast::ScalarKind::Bit => operand.kind() != ast::ScalarKind::Bit,
ast::ScalarKind::Float => operand.kind() == ast::ScalarKind::Bit,
ast::ScalarKind::Signed => {
operand.kind() == ast::ScalarKind::Bit
|| operand.kind() == ast::ScalarKind::Unsigned
}
ast::ScalarKind::Unsigned => {
operand.kind() == ast::ScalarKind::Bit
|| operand.kind() == ast::ScalarKind::Signed
}
ast::ScalarKind::Pred => false,
}
}
(ast::Type::Vector(_, inst), ast::Type::Vector(_, operand))
| (ast::Type::Array(_, inst, _), ast::Type::Array(_, operand, _)) => {
should_bitcast(&ast::Type::Scalar(*inst), &ast::Type::Scalar(*operand))
}
_ => false,
}
}
pub(crate) fn should_convert_relaxed_dst_wrapper(
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
) -> Result<Option<ConversionKind>, TranslateError> {
if operand_space != instruction_space {
return Err(TranslateError::MismatchedType);
}
if operand_type == instruction_type {
return Ok(None);
}
match should_convert_relaxed_dst(operand_type, instruction_type) {
conv @ Some(_) => Ok(conv),
None => Err(TranslateError::MismatchedType),
}
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#operand-size-exceeding-instruction-type-size__relaxed-type-checking-rules-destination-operands
fn should_convert_relaxed_dst(
dst_type: &ast::Type,
instr_type: &ast::Type,
) -> Option<ConversionKind> {
if dst_type == instr_type {
return None;
}
match (dst_type, instr_type) {
(ast::Type::Scalar(dst_type), ast::Type::Scalar(instr_type)) => match instr_type.kind() {
ast::ScalarKind::Bit => {
if instr_type.size_of() <= dst_type.size_of() {
Some(ConversionKind::Default)
} else {
None
}
}
ast::ScalarKind::Signed => {
if dst_type.kind() != ast::ScalarKind::Float {
if instr_type.size_of() == dst_type.size_of() {
Some(ConversionKind::Default)
} else if instr_type.size_of() < dst_type.size_of() {
Some(ConversionKind::SignExtend)
} else {
None
}
} else {
None
}
}
ast::ScalarKind::Unsigned => {
if instr_type.size_of() <= dst_type.size_of()
&& dst_type.kind() != ast::ScalarKind::Float
{
Some(ConversionKind::Default)
} else {
None
}
}
ast::ScalarKind::Float => {
if instr_type.size_of() <= dst_type.size_of()
&& dst_type.kind() == ast::ScalarKind::Bit
{
Some(ConversionKind::Default)
} else {
None
}
}
ast::ScalarKind::Pred => None,
},
(ast::Type::Vector(_, dst_type), ast::Type::Vector(_, instr_type))
| (ast::Type::Array(_, dst_type, _), ast::Type::Array(_, instr_type, _)) => {
should_convert_relaxed_dst(
&ast::Type::Scalar(*dst_type),
&ast::Type::Scalar(*instr_type),
)
}
_ => None,
}
}
pub(crate) fn should_convert_relaxed_src_wrapper(
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
) -> Result<Option<ConversionKind>, TranslateError> {
if operand_space != instruction_space {
return Err(error_mismatched_type());
}
if operand_type == instruction_type {
return Ok(None);
}
match should_convert_relaxed_src(operand_type, instruction_type) {
conv @ Some(_) => Ok(conv),
None => Err(error_mismatched_type()),
}
}
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#operand-size-exceeding-instruction-type-size__relaxed-type-checking-rules-source-operands
fn should_convert_relaxed_src(
src_type: &ast::Type,
instr_type: &ast::Type,
) -> Option<ConversionKind> {
if src_type == instr_type {
return None;
}
match (src_type, instr_type) {
(ast::Type::Scalar(src_type), ast::Type::Scalar(instr_type)) => match instr_type.kind() {
ast::ScalarKind::Bit => {
if instr_type.size_of() <= src_type.size_of() {
Some(ConversionKind::Default)
} else {
None
}
}
ast::ScalarKind::Signed | ast::ScalarKind::Unsigned => {
if instr_type.size_of() <= src_type.size_of()
&& src_type.kind() != ast::ScalarKind::Float
{
Some(ConversionKind::Default)
} else {
None
}
}
ast::ScalarKind::Float => {
if instr_type.size_of() <= src_type.size_of()
&& src_type.kind() == ast::ScalarKind::Bit
{
Some(ConversionKind::Default)
} else {
None
}
}
ast::ScalarKind::Pred => None,
},
(ast::Type::Vector(_, dst_type), ast::Type::Vector(_, instr_type))
| (ast::Type::Array(_, dst_type, _), ast::Type::Array(_, instr_type, _)) => {
should_convert_relaxed_src(
&ast::Type::Scalar(*dst_type),
&ast::Type::Scalar(*instr_type),
)
}
_ => None,
}
}

View File

@ -1,275 +0,0 @@
use super::*;
use ptx_parser as ast;
/*
How do we handle arguments:
- input .params in kernels
.param .b64 in_arg
get turned into this SPIR-V:
%1 = OpFunctionParameter %ulong
%2 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %1
We do this for two reasons. One, common treatment for argument-declared
.param variables and .param variables inside function (we assume that
at SPIR-V level every .param is a pointer in Function storage class)
- input .params in functions
.param .b64 in_arg
get turned into this SPIR-V:
%1 = OpFunctionParameter %_ptr_Function_ulong
- input .regs
.reg .b64 in_arg
get turned into the same SPIR-V as kernel .params:
%1 = OpFunctionParameter %ulong
%2 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %1
- output .regs
.reg .b64 out_arg
get just a variable declaration:
%2 = OpVariable %%_ptr_Function_ulong Function
- output .params don't exist, they have been moved to input positions
by an earlier pass
Distinguishing betweem kernel .params and function .params is not the
cleanest solution. Alternatively, we could "deparamize" all kernel .param
arguments by turning them into .reg arguments like this:
.param .b64 arg -> .reg ptr<.b64,.param> arg
This has the massive downside that this transformation would have to run
very early and would muddy up already difficult code. It's simpler to just
have an if here
*/
pub(super) fn run<'a, 'b>(
func: Vec<TypedStatement>,
id_def: &mut NumericIdResolver,
fn_decl: &'a mut ast::MethodDeclaration<'b, SpirvWord>,
) -> Result<Vec<TypedStatement>, TranslateError> {
let mut result = Vec::with_capacity(func.len());
for arg in fn_decl.input_arguments.iter_mut() {
insert_mem_ssa_argument(
id_def,
&mut result,
arg,
matches!(fn_decl.name, ast::MethodName::Kernel(_)),
);
}
for arg in fn_decl.return_arguments.iter() {
insert_mem_ssa_argument_reg_return(&mut result, arg);
}
for s in func {
match s {
Statement::Instruction(inst) => match inst {
ast::Instruction::Ret { data } => {
// TODO: handle multiple output args
match &fn_decl.return_arguments[..] {
[return_reg] => {
let new_id = id_def.register_intermediate(Some((
return_reg.v_type.clone(),
ast::StateSpace::Reg,
)));
result.push(Statement::LoadVar(LoadVarDetails {
arg: ast::LdArgs {
dst: new_id,
src: return_reg.name,
},
typ: return_reg.v_type.clone(),
member_index: None,
}));
result.push(Statement::RetValue(data, new_id));
}
[] => result.push(Statement::Instruction(ast::Instruction::Ret { data })),
_ => unimplemented!(),
}
}
inst => insert_mem_ssa_statement_default(
id_def,
&mut result,
Statement::Instruction(inst),
)?,
},
Statement::Conditional(bra) => {
insert_mem_ssa_statement_default(id_def, &mut result, Statement::Conditional(bra))?
}
Statement::Conversion(conv) => {
insert_mem_ssa_statement_default(id_def, &mut result, Statement::Conversion(conv))?
}
Statement::PtrAccess(ptr_access) => insert_mem_ssa_statement_default(
id_def,
&mut result,
Statement::PtrAccess(ptr_access),
)?,
Statement::RepackVector(repack) => insert_mem_ssa_statement_default(
id_def,
&mut result,
Statement::RepackVector(repack),
)?,
Statement::FunctionPointer(func_ptr) => insert_mem_ssa_statement_default(
id_def,
&mut result,
Statement::FunctionPointer(func_ptr),
)?,
s @ Statement::Variable(_) | s @ Statement::Label(_) | s @ Statement::Constant(..) => {
result.push(s)
}
_ => return Err(error_unreachable()),
}
}
Ok(result)
}
fn insert_mem_ssa_argument(
id_def: &mut NumericIdResolver,
func: &mut Vec<TypedStatement>,
arg: &mut ast::Variable<SpirvWord>,
is_kernel: bool,
) {
if !is_kernel && arg.state_space == ast::StateSpace::Param {
return;
}
let new_id = id_def.register_intermediate(Some((arg.v_type.clone(), arg.state_space)));
func.push(Statement::Variable(ast::Variable {
align: arg.align,
v_type: arg.v_type.clone(),
state_space: ast::StateSpace::Reg,
name: arg.name,
array_init: Vec::new(),
}));
func.push(Statement::StoreVar(StoreVarDetails {
arg: ast::StArgs {
src1: arg.name,
src2: new_id,
},
typ: arg.v_type.clone(),
member_index: None,
}));
arg.name = new_id;
}
fn insert_mem_ssa_argument_reg_return(
func: &mut Vec<TypedStatement>,
arg: &ast::Variable<SpirvWord>,
) {
func.push(Statement::Variable(ast::Variable {
align: arg.align,
v_type: arg.v_type.clone(),
state_space: arg.state_space,
name: arg.name,
array_init: arg.array_init.clone(),
}));
}
fn insert_mem_ssa_statement_default<'a, 'input>(
id_def: &'a mut NumericIdResolver<'input>,
func: &'a mut Vec<TypedStatement>,
stmt: TypedStatement,
) -> Result<(), TranslateError> {
let mut visitor = InsertMemSSAVisitor {
id_def,
func,
post_statements: Vec::new(),
};
let new_stmt = stmt.visit_map(&mut visitor)?;
visitor.func.push(new_stmt);
visitor.func.extend(visitor.post_statements);
Ok(())
}
struct InsertMemSSAVisitor<'a, 'input> {
id_def: &'a mut NumericIdResolver<'input>,
func: &'a mut Vec<TypedStatement>,
post_statements: Vec<TypedStatement>,
}
impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
fn symbol(
&mut self,
symbol: SpirvWord,
member_index: Option<u8>,
expected: Option<(&ast::Type, ast::StateSpace)>,
is_dst: bool,
) -> Result<SpirvWord, TranslateError> {
if expected.is_none() {
return Ok(symbol);
};
let (mut var_type, var_space, is_variable) = self.id_def.get_typed(symbol)?;
if var_space != ast::StateSpace::Reg || !is_variable {
return Ok(symbol);
};
let member_index = match member_index {
Some(idx) => {
let vector_width = match var_type {
ast::Type::Vector(width, scalar_t) => {
var_type = ast::Type::Scalar(scalar_t);
width
}
_ => return Err(error_mismatched_type()),
};
Some((
idx,
if self.id_def.special_registers.get(symbol).is_some() {
Some(vector_width)
} else {
None
},
))
}
None => None,
};
let generated_id = self
.id_def
.register_intermediate(Some((var_type.clone(), ast::StateSpace::Reg)));
if !is_dst {
self.func.push(Statement::LoadVar(LoadVarDetails {
arg: ast::LdArgs {
dst: generated_id,
src: symbol,
},
typ: var_type,
member_index,
}));
} else {
self.post_statements
.push(Statement::StoreVar(StoreVarDetails {
arg: ast::StArgs {
src1: symbol,
src2: generated_id,
},
typ: var_type,
member_index: member_index.map(|(idx, _)| idx),
}));
}
Ok(generated_id)
}
}
impl<'a, 'input> ast::VisitorMap<TypedOperand, TypedOperand, TranslateError>
for InsertMemSSAVisitor<'a, 'input>
{
fn visit(
&mut self,
operand: TypedOperand,
type_space: Option<(&ast::Type, ast::StateSpace)>,
is_dst: bool,
_relaxed_type_check: bool,
) -> Result<TypedOperand, TranslateError> {
Ok(match operand {
TypedOperand::Reg(reg) => {
TypedOperand::Reg(self.symbol(reg, None, type_space, is_dst)?)
}
TypedOperand::RegOffset(reg, offset) => {
TypedOperand::RegOffset(self.symbol(reg, None, type_space, is_dst)?, offset)
}
op @ TypedOperand::Imm(..) => op,
TypedOperand::VecMember(symbol, index) => {
TypedOperand::Reg(self.symbol(symbol, Some(index), type_space, is_dst)?)
}
})
}
fn visit_ident(
&mut self,
args: SpirvWord,
type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
is_dst: bool,
relaxed_type_check: bool,
) -> Result<SpirvWord, TranslateError> {
self.symbol(args, None, type_space, is_dst)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,80 +0,0 @@
use super::*;
use ptx_parser as ast;
pub(crate) fn run<'input, 'b>(
id_defs: &mut FnStringIdResolver<'input, 'b>,
fn_defs: &GlobalFnDeclResolver<'input, 'b>,
func: Vec<ast::Statement<ast::ParsedOperand<&'input str>>>,
) -> Result<Vec<NormalizedStatement>, TranslateError> {
for s in func.iter() {
match s {
ast::Statement::Label(id) => {
id_defs.add_def(*id, None, false);
}
_ => (),
}
}
let mut result = Vec::new();
for s in func {
expand_map_variables(id_defs, fn_defs, &mut result, s)?;
}
Ok(result)
}
fn expand_map_variables<'a, 'b>(
id_defs: &mut FnStringIdResolver<'a, 'b>,
fn_defs: &GlobalFnDeclResolver<'a, 'b>,
result: &mut Vec<NormalizedStatement>,
s: ast::Statement<ast::ParsedOperand<&'a str>>,
) -> Result<(), TranslateError> {
match s {
ast::Statement::Block(block) => {
id_defs.start_block();
for s in block {
expand_map_variables(id_defs, fn_defs, result, s)?;
}
id_defs.end_block();
}
ast::Statement::Label(name) => result.push(Statement::Label(id_defs.get_id(name)?)),
ast::Statement::Instruction(p, i) => result.push(Statement::Instruction((
p.map(|p| pred_map_variable(p, &mut |id| id_defs.get_id(id)))
.transpose()?,
ast::visit_map(i, &mut |id,
_: Option<(&ast::Type, ast::StateSpace)>,
_: bool,
_: bool| {
id_defs.get_id(id)
})?,
))),
ast::Statement::Variable(var) => {
let var_type = var.var.v_type.clone();
match var.count {
Some(count) => {
for new_id in
id_defs.add_defs(var.var.name, count, var_type, var.var.state_space, true)
{
result.push(Statement::Variable(ast::Variable {
align: var.var.align,
v_type: var.var.v_type.clone(),
state_space: var.var.state_space,
name: new_id,
array_init: var.var.array_init.clone(),
}))
}
}
None => {
let new_id =
id_defs.add_def(var.var.name, Some((var_type, var.var.state_space)), true);
result.push(Statement::Variable(ast::Variable {
align: var.var.align,
v_type: var.var.v_type.clone(),
state_space: var.var.state_space,
name: new_id,
array_init: var.var.array_init,
}));
}
}
}
};
Ok(())
}

View File

@ -1,6 +1,5 @@
use super::*; use super::*;
use ptx_parser as ast; use ptx_parser as ast;
use rustc_hash::FxHashMap;
pub(crate) fn run<'input, 'b>( pub(crate) fn run<'input, 'b>(
resolver: &mut ScopedResolver<'input, 'b>, resolver: &mut ScopedResolver<'input, 'b>,
@ -37,7 +36,7 @@ fn run_method<'input, 'b>(
let name = match method.func_directive.name { let name = match method.func_directive.name {
ast::MethodName::Kernel(name) => ast::MethodName::Kernel(name), ast::MethodName::Kernel(name) => ast::MethodName::Kernel(name),
ast::MethodName::Func(text) => { ast::MethodName::Func(text) => {
ast::MethodName::Func(resolver.add(Cow::Borrowed(text), None)?) ast::MethodName::Func(resolver.add_or_get_in_current_scope_untyped(text)?)
} }
}; };
resolver.start_scope(); resolver.start_scope();

View File

@ -1,49 +0,0 @@
use std::{collections::HashSet, iter};
use super::*;
pub(super) fn run(
func: Vec<ExpandedStatement>,
id_def: &mut NumericIdResolver,
) -> Vec<ExpandedStatement> {
let mut labels_in_use = HashSet::new();
for s in func.iter() {
match s {
Statement::Instruction(i) => {
if let Some(target) = jump_target(i) {
labels_in_use.insert(target);
}
}
Statement::Conditional(cond) => {
labels_in_use.insert(cond.if_true);
labels_in_use.insert(cond.if_false);
}
Statement::Variable(..)
| Statement::LoadVar(..)
| Statement::StoreVar(..)
| Statement::RetValue(..)
| Statement::Conversion(..)
| Statement::Constant(..)
| Statement::Label(..)
| Statement::PtrAccess { .. }
| Statement::VectorAccess { .. }
| Statement::RepackVector(..)
| Statement::FunctionPointer(..) => {}
}
}
iter::once(Statement::Label(id_def.register_intermediate(None)))
.chain(func.into_iter().filter(|s| match s {
Statement::Label(i) => labels_in_use.contains(i),
_ => true,
}))
.collect::<Vec<_>>()
}
fn jump_target<T: ast::Operand<Ident = SpirvWord>>(
this: &ast::Instruction<T>,
) -> Option<SpirvWord> {
match this {
ast::Instruction::Bra { arguments } => Some(arguments.src),
_ => None,
}
}

View File

@ -1,44 +0,0 @@
use super::*;
use ptx_parser as ast;
pub(crate) fn run(
func: Vec<NormalizedStatement>,
id_def: &mut NumericIdResolver,
) -> Result<Vec<UnconditionalStatement>, TranslateError> {
let mut result = Vec::with_capacity(func.len());
for s in func {
match s {
Statement::Label(id) => result.push(Statement::Label(id)),
Statement::Instruction((pred, inst)) => {
if let Some(pred) = pred {
let if_true = id_def.register_intermediate(None);
let if_false = id_def.register_intermediate(None);
let folded_bra = match &inst {
ast::Instruction::Bra { arguments, .. } => Some(arguments.src),
_ => None,
};
let mut branch = BrachCondition {
predicate: pred.label,
if_true: folded_bra.unwrap_or(if_true),
if_false,
};
if pred.not {
std::mem::swap(&mut branch.if_true, &mut branch.if_false);
}
result.push(Statement::Conditional(branch));
if folded_bra.is_none() {
result.push(Statement::Label(if_true));
result.push(Statement::Instruction(inst));
}
result.push(Statement::Label(if_false));
} else {
result.push(Statement::Instruction(inst));
}
}
Statement::Variable(var) => result.push(Statement::Variable(var)),
// Blocks are flattened when resolving ids
_ => return Err(error_unreachable()),
}
}
Ok(result)
}

View File

@ -0,0 +1,187 @@
use super::*;
pub(super) fn run<'input>(
resolver: &mut GlobalStringIdentResolver2<'input>,
directives: Vec<Directive2<'input, ast::Instruction<SpirvWord>, SpirvWord>>,
) -> Result<Vec<Directive2<'input, ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
let mut fn_declarations = FxHashMap::default();
let remapped_directives = directives
.into_iter()
.map(|directive| run_directive(resolver, &mut fn_declarations, directive))
.collect::<Result<Vec<_>, _>>()?;
let mut result = fn_declarations
.into_iter()
.map(|(_, (return_arguments, name, input_arguments))| {
Directive2::Method(Function2 {
func_decl: ast::MethodDeclaration {
return_arguments,
name: ast::MethodName::Func(name),
input_arguments,
shared_mem: None,
},
globals: Vec::new(),
body: None,
import_as: None,
tuning: Vec::new(),
linkage: ast::LinkingDirective::EXTERN,
})
})
.collect::<Vec<_>>();
result.extend(remapped_directives);
Ok(result)
}
fn run_directive<'input>(
resolver: &mut GlobalStringIdentResolver2<'input>,
fn_declarations: &mut FxHashMap<
Cow<'input, str>,
(
Vec<ast::Variable<SpirvWord>>,
SpirvWord,
Vec<ast::Variable<SpirvWord>>,
),
>,
directive: Directive2<'input, ast::Instruction<SpirvWord>, SpirvWord>,
) -> Result<Directive2<'input, ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
Ok(match directive {
var @ Directive2::Variable(..) => var,
Directive2::Method(mut method) => {
method.body = method
.body
.map(|statements| run_statements(resolver, fn_declarations, statements))
.transpose()?;
Directive2::Method(method)
}
})
}
fn run_statements<'input>(
resolver: &mut GlobalStringIdentResolver2<'input>,
fn_declarations: &mut FxHashMap<
Cow<'input, str>,
(
Vec<ast::Variable<SpirvWord>>,
SpirvWord,
Vec<ast::Variable<SpirvWord>>,
),
>,
statements: Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
) -> Result<Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
statements
.into_iter()
.map(|statement| {
Ok(match statement {
Statement::Instruction(instruction) => {
Statement::Instruction(run_instruction(resolver, fn_declarations, instruction)?)
}
s => s,
})
})
.collect::<Result<Vec<_>, _>>()
}
fn run_instruction<'input>(
resolver: &mut GlobalStringIdentResolver2<'input>,
fn_declarations: &mut FxHashMap<
Cow<'input, str>,
(
Vec<ast::Variable<SpirvWord>>,
SpirvWord,
Vec<ast::Variable<SpirvWord>>,
),
>,
instruction: ptx_parser::Instruction<SpirvWord>,
) -> Result<ptx_parser::Instruction<SpirvWord>, TranslateError> {
Ok(match instruction {
i @ ptx_parser::Instruction::Activemask { .. } => {
to_call(resolver, fn_declarations, "activemask".into(), i)?
}
i @ ptx_parser::Instruction::Bfe { data, .. } => {
let name = ["bfe_", scalar_to_ptx_name(data)].concat();
to_call(resolver, fn_declarations, name.into(), i)?
}
i @ ptx_parser::Instruction::Bfi { data, .. } => {
let name = ["bfi_", scalar_to_ptx_name(data)].concat();
to_call(resolver, fn_declarations, name.into(), i)?
}
i => i,
})
}
fn to_call<'input>(
resolver: &mut GlobalStringIdentResolver2<'input>,
fn_declarations: &mut FxHashMap<
Cow<'input, str>,
(
Vec<ast::Variable<SpirvWord>>,
SpirvWord,
Vec<ast::Variable<SpirvWord>>,
),
>,
name: Cow<'input, str>,
i: ast::Instruction<SpirvWord>,
) -> Result<ptx_parser::Instruction<SpirvWord>, TranslateError> {
let mut data_return = Vec::new();
let mut data_input = Vec::new();
let mut arguments_return = Vec::new();
let mut arguments_input = Vec::new();
ast::visit(&i, &mut |name: &SpirvWord,
type_space: Option<(
&ptx_parser::Type,
ptx_parser::StateSpace,
)>,
is_dst: bool,
_: bool| {
let (type_, space) = type_space.ok_or_else(error_mismatched_type)?;
if is_dst {
data_return.push((type_.clone(), space));
arguments_return.push(*name);
} else {
data_input.push((type_.clone(), space));
arguments_input.push(*name);
};
Ok::<_, TranslateError>(())
})?;
let fn_name = match fn_declarations.entry(name) {
hash_map::Entry::Occupied(occupied_entry) => occupied_entry.get().1,
hash_map::Entry::Vacant(vacant_entry) => {
let name = vacant_entry.key().clone();
let full_name = [ZLUDA_PTX_PREFIX, &*name].concat();
let name = resolver.register_named(Cow::Owned(full_name.clone()), None);
vacant_entry.insert((
to_variables(resolver, &data_return),
name,
to_variables(resolver, &data_input),
));
name
}
};
Ok(ast::Instruction::Call {
data: ptx_parser::CallDetails {
uniform: false,
return_arguments: data_return,
input_arguments: data_input,
},
arguments: ptx_parser::CallArgs {
return_arguments: arguments_return,
func: fn_name,
input_arguments: arguments_input,
},
})
}
fn to_variables<'input>(
resolver: &mut GlobalStringIdentResolver2<'input>,
arguments: &Vec<(ptx_parser::Type, ptx_parser::StateSpace)>,
) -> Vec<ptx_parser::Variable<SpirvWord>> {
arguments
.iter()
.map(|(type_, space)| ast::Variable {
align: None,
v_type: type_.clone(),
state_space: *space,
name: resolver.register_unnamed(Some((type_.clone(), *space))),
array_init: Vec::new(),
})
.collect::<Vec<_>>()
}

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,15 @@
use super::ptx; use crate::pass::TranslateError;
use super::TranslateError; use ptx_parser as ast;
mod spirv_run; mod spirv_run;
fn parse_and_assert(s: &str) { fn parse_and_assert(ptx_text: &str) {
let mut errors = Vec::new(); ast::parse_module_checked(ptx_text).unwrap();
ptx::ModuleParser::new().parse(&mut errors, s).unwrap();
assert!(errors.len() == 0);
} }
fn compile_and_assert(s: &str) -> Result<(), TranslateError> { fn compile_and_assert(ptx_text: &str) -> Result<(), TranslateError> {
let mut errors = Vec::new(); let ast = ast::parse_module_checked(ptx_text).unwrap();
let ast = ptx::ModuleParser::new().parse(&mut errors, s).unwrap(); crate::to_llvm_module(ast)?;
crate::to_spirv_module(ast)?;
Ok(()) Ok(())
} }

View File

@ -1,45 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%18 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "activemask"
OpExecutionMode %1 ContractionOff
OpDecorate %15 LinkageAttributes "__zluda_ptx_impl__activemask" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%21 = OpTypeFunction %uint
%ulong = OpTypeInt 64 0
%23 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%15 = OpFunction %uint None %21
OpFunctionEnd
%1 = OpFunction %void None %23
%6 = OpFunctionParameter %ulong
%7 = OpFunctionParameter %ulong
%14 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_uint Function
OpStore %2 %6
OpStore %3 %7
%8 = OpLoad %ulong %3 Aligned 8
OpStore %4 %8
%9 = OpFunctionCall %uint %15
OpStore %5 %9
%10 = OpLoad %ulong %4
%11 = OpLoad %uint %5
%12 = OpConvertUToPtr %_ptr_Generic_uint %10
%13 = OpCopyObject %uint %11
OpStore %12 %13 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,47 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%23 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "add"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%26 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_1 = OpConstant %ulong 1
%1 = OpFunction %void None %26
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%21 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%19 = OpConvertUToPtr %_ptr_Generic_ulong %13
%12 = OpLoad %ulong %19 Aligned 8
OpStore %6 %12
%15 = OpLoad %ulong %6
%14 = OpIAdd %ulong %15 %ulong_1
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %ulong %7
%20 = OpConvertUToPtr %_ptr_Generic_ulong %16
OpStore %20 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,47 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%23 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "add_non_coherent"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%26 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%ulong_1 = OpConstant %ulong 1
%1 = OpFunction %void None %26
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%21 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%19 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %13
%12 = OpLoad %ulong %19 Aligned 8
OpStore %6 %12
%15 = OpLoad %ulong %6
%14 = OpIAdd %ulong %15 %ulong_1
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %ulong %7
%20 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %16
OpStore %20 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,55 +0,0 @@
; SPIR-V
; Version: 1.3
; Generator: rspirv
; Bound: 29
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
%23 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "add_tuning"
OpExecutionMode %1 ContractionOff
; OpExecutionMode %1 MaxWorkgroupSizeINTEL 256 1 1
OpDecorate %1 LinkageAttributes "add_tuning" Export
%24 = OpTypeVoid
%25 = OpTypeInt 64 0
%26 = OpTypeFunction %24 %25 %25
%27 = OpTypePointer Function %25
%28 = OpTypePointer Generic %25
%18 = OpConstant %25 1
%1 = OpFunction %24 None %26
%8 = OpFunctionParameter %25
%9 = OpFunctionParameter %25
%21 = OpLabel
%2 = OpVariable %27 Function
%3 = OpVariable %27 Function
%4 = OpVariable %27 Function
%5 = OpVariable %27 Function
%6 = OpVariable %27 Function
%7 = OpVariable %27 Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %25 %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %25 %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %25 %4
%19 = OpConvertUToPtr %28 %13
%12 = OpLoad %25 %19 Aligned 8
OpStore %6 %12
%15 = OpLoad %25 %6
%14 = OpIAdd %25 %15 %18
OpStore %7 %14
%16 = OpLoad %25 %5
%17 = OpLoad %25 %7
%20 = OpConvertUToPtr %28 %16
OpStore %20 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,62 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%31 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "and"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%34 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%1 = OpFunction %void None %34
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%29 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_uint %13
%12 = OpLoad %uint %23 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_uint %15
%41 = OpBitcast %_ptr_Generic_uchar %24
%42 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %41 %ulong_4
%22 = OpBitcast %_ptr_Generic_uint %42
%14 = OpLoad %uint %22 Aligned 4
OpStore %7 %14
%17 = OpLoad %uint %6
%18 = OpLoad %uint %7
%26 = OpCopyObject %uint %17
%27 = OpCopyObject %uint %18
%25 = OpBitwiseAnd %uint %26 %27
%16 = OpCopyObject %uint %25
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %uint %6
%28 = OpConvertUToPtr %_ptr_Generic_uint %19
OpStore %28 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,105 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%67 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %12 "assertfail"
OpDecorate %1 LinkageAttributes "__zluda_ptx_impl____assertfail" Import
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%73 = OpTypeFunction %void %_ptr_Function_ulong %_ptr_Function_ulong %_ptr_Function_uint %_ptr_Function_ulong %_ptr_Function_ulong
%74 = OpTypeFunction %void %ulong %ulong
%uint_0 = OpConstant %uint 0
%ulong_0 = OpConstant %ulong 0
%uchar = OpTypeInt 8 0
%_ptr_Function_uchar = OpTypePointer Function %uchar
%ulong_0_0 = OpConstant %ulong 0
%ulong_0_1 = OpConstant %ulong 0
%ulong_0_2 = OpConstant %ulong 0
%ulong_0_3 = OpConstant %ulong 0
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_1 = OpConstant %ulong 1
%1 = OpFunction %void None %73
%61 = OpFunctionParameter %_ptr_Function_ulong
%62 = OpFunctionParameter %_ptr_Function_ulong
%63 = OpFunctionParameter %_ptr_Function_uint
%64 = OpFunctionParameter %_ptr_Function_ulong
%65 = OpFunctionParameter %_ptr_Function_ulong
OpFunctionEnd
%12 = OpFunction %void None %74
%25 = OpFunctionParameter %ulong
%26 = OpFunctionParameter %ulong
%60 = OpLabel
%13 = OpVariable %_ptr_Function_ulong Function
%14 = OpVariable %_ptr_Function_ulong Function
%15 = OpVariable %_ptr_Function_ulong Function
%16 = OpVariable %_ptr_Function_ulong Function
%17 = OpVariable %_ptr_Function_ulong Function
%18 = OpVariable %_ptr_Function_ulong Function
%19 = OpVariable %_ptr_Function_uint Function
%20 = OpVariable %_ptr_Function_ulong Function
%21 = OpVariable %_ptr_Function_ulong Function
%22 = OpVariable %_ptr_Function_uint Function
%23 = OpVariable %_ptr_Function_ulong Function
%24 = OpVariable %_ptr_Function_ulong Function
OpStore %13 %25
OpStore %14 %26
%27 = OpLoad %ulong %13 Aligned 8
OpStore %15 %27
%28 = OpLoad %ulong %14 Aligned 8
OpStore %16 %28
%53 = OpCopyObject %uint %uint_0
%29 = OpCopyObject %uint %53
OpStore %19 %29
%30 = OpLoad %ulong %15
%77 = OpBitcast %_ptr_Function_uchar %20
%78 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %77 %ulong_0
%43 = OpBitcast %_ptr_Function_ulong %78
%54 = OpCopyObject %ulong %30
OpStore %43 %54 Aligned 8
%31 = OpLoad %ulong %15
%79 = OpBitcast %_ptr_Function_uchar %21
%80 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %79 %ulong_0_0
%45 = OpBitcast %_ptr_Function_ulong %80
%55 = OpCopyObject %ulong %31
OpStore %45 %55 Aligned 8
%32 = OpLoad %uint %19
%81 = OpBitcast %_ptr_Function_uchar %22
%82 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %81 %ulong_0_1
%47 = OpBitcast %_ptr_Function_uint %82
OpStore %47 %32 Aligned 4
%33 = OpLoad %ulong %15
%83 = OpBitcast %_ptr_Function_uchar %23
%84 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %83 %ulong_0_2
%49 = OpBitcast %_ptr_Function_ulong %84
%56 = OpCopyObject %ulong %33
OpStore %49 %56 Aligned 8
%34 = OpLoad %ulong %15
%85 = OpBitcast %_ptr_Function_uchar %24
%86 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %85 %ulong_0_3
%51 = OpBitcast %_ptr_Function_ulong %86
%57 = OpCopyObject %ulong %34
OpStore %51 %57 Aligned 8
%87 = OpFunctionCall %void %1 %20 %21 %22 %23 %24
%36 = OpLoad %ulong %15
%58 = OpConvertUToPtr %_ptr_Generic_ulong %36
%35 = OpLoad %ulong %58 Aligned 8
OpStore %17 %35
%38 = OpLoad %ulong %17
%37 = OpIAdd %ulong %38 %ulong_1
OpStore %18 %37
%39 = OpLoad %ulong %16
%40 = OpLoad %ulong %18
%59 = OpConvertUToPtr %_ptr_Generic_ulong %39
OpStore %59 %40 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,85 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%38 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "atom_add" %4
OpExecutionMode %1 ContractionOff
OpDecorate %4 Alignment 4
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%uchar = OpTypeInt 8 0
%uint_1024 = OpConstant %uint 1024
%_arr_uchar_uint_1024 = OpTypeArray %uchar %uint_1024
%_ptr_Workgroup__arr_uchar_uint_1024 = OpTypePointer Workgroup %_arr_uchar_uint_1024
%4 = OpVariable %_ptr_Workgroup__arr_uchar_uint_1024 Workgroup
%ulong = OpTypeInt 64 0
%46 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
%uint_1 = OpConstant %uint 1
%uint_0 = OpConstant %uint 0
%ulong_4_0 = OpConstant %ulong 4
%1 = OpFunction %void None %46
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%36 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_uint Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %5 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %6 %12
%14 = OpLoad %ulong %5
%29 = OpConvertUToPtr %_ptr_Generic_uint %14
%13 = OpLoad %uint %29 Aligned 4
OpStore %7 %13
%16 = OpLoad %ulong %5
%30 = OpConvertUToPtr %_ptr_Generic_uint %16
%51 = OpBitcast %_ptr_Generic_uchar %30
%52 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %51 %ulong_4
%26 = OpBitcast %_ptr_Generic_uint %52
%15 = OpLoad %uint %26 Aligned 4
OpStore %8 %15
%17 = OpLoad %uint %7
%31 = OpBitcast %_ptr_Workgroup_uint %4
OpStore %31 %17 Aligned 4
%19 = OpLoad %uint %8
%32 = OpBitcast %_ptr_Workgroup_uint %4
%18 = OpAtomicIAdd %uint %32 %uint_1 %uint_0 %19
OpStore %7 %18
%33 = OpBitcast %_ptr_Workgroup_uint %4
%20 = OpLoad %uint %33 Aligned 4
OpStore %8 %20
%21 = OpLoad %ulong %6
%22 = OpLoad %uint %7
%34 = OpConvertUToPtr %_ptr_Generic_uint %21
OpStore %34 %22 Aligned 4
%23 = OpLoad %ulong %6
%24 = OpLoad %uint %8
%35 = OpConvertUToPtr %_ptr_Generic_uint %23
%56 = OpBitcast %_ptr_Generic_uchar %35
%57 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %56 %ulong_4_0
%28 = OpBitcast %_ptr_Generic_uint %57
OpStore %28 %24 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,90 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%42 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "atom_add_float" %4
OpExecutionMode %1 ContractionOff
OpDecorate %37 LinkageAttributes "__zluda_ptx_impl__atom_relaxed_gpu_shared_add_f32" Import
OpDecorate %4 Alignment 4
%void = OpTypeVoid
%float = OpTypeFloat 32
%_ptr_Workgroup_float = OpTypePointer Workgroup %float
%46 = OpTypeFunction %float %_ptr_Workgroup_float %float
%uint = OpTypeInt 32 0
%uchar = OpTypeInt 8 0
%uint_1024 = OpConstant %uint 1024
%_arr_uchar_uint_1024 = OpTypeArray %uchar %uint_1024
%_ptr_Workgroup__arr_uchar_uint_1024 = OpTypePointer Workgroup %_arr_uchar_uint_1024
%4 = OpVariable %_ptr_Workgroup__arr_uchar_uint_1024 Workgroup
%ulong = OpTypeInt 64 0
%53 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_4_0 = OpConstant %ulong 4
%37 = OpFunction %float None %46
%39 = OpFunctionParameter %_ptr_Workgroup_float
%40 = OpFunctionParameter %float
OpFunctionEnd
%1 = OpFunction %void None %53
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%36 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_float Function
%8 = OpVariable %_ptr_Function_float Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %5 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %6 %12
%14 = OpLoad %ulong %5
%29 = OpConvertUToPtr %_ptr_Generic_float %14
%13 = OpLoad %float %29 Aligned 4
OpStore %7 %13
%16 = OpLoad %ulong %5
%30 = OpConvertUToPtr %_ptr_Generic_float %16
%58 = OpBitcast %_ptr_Generic_uchar %30
%59 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %58 %ulong_4
%26 = OpBitcast %_ptr_Generic_float %59
%15 = OpLoad %float %26 Aligned 4
OpStore %8 %15
%17 = OpLoad %float %7
%31 = OpBitcast %_ptr_Workgroup_float %4
OpStore %31 %17 Aligned 4
%19 = OpLoad %float %8
%32 = OpBitcast %_ptr_Workgroup_float %4
%18 = OpFunctionCall %float %37 %32 %19
OpStore %7 %18
%33 = OpBitcast %_ptr_Workgroup_float %4
%20 = OpLoad %float %33 Aligned 4
OpStore %8 %20
%21 = OpLoad %ulong %6
%22 = OpLoad %float %7
%34 = OpConvertUToPtr %_ptr_Generic_float %21
OpStore %34 %22 Aligned 4
%23 = OpLoad %ulong %6
%24 = OpLoad %float %8
%35 = OpConvertUToPtr %_ptr_Generic_float %23
%60 = OpBitcast %_ptr_Generic_uchar %35
%61 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %60 %ulong_4_0
%28 = OpBitcast %_ptr_Generic_float %61
OpStore %28 %24 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,77 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%39 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "atom_cas"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%42 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%uint_100 = OpConstant %uint 100
%uint_1 = OpConstant %uint 1
%uint_0 = OpConstant %uint 0
%ulong_4_0 = OpConstant %ulong 4
%ulong_4_1 = OpConstant %ulong 4
%1 = OpFunction %void None %42
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%37 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%30 = OpConvertUToPtr %_ptr_Generic_uint %13
%12 = OpLoad %uint %30 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%16 = OpLoad %uint %6
%31 = OpConvertUToPtr %_ptr_Generic_uint %15
%49 = OpBitcast %_ptr_Generic_uchar %31
%50 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %49 %ulong_4
%24 = OpBitcast %_ptr_Generic_uint %50
%33 = OpCopyObject %uint %16
%32 = OpAtomicCompareExchange %uint %24 %uint_1 %uint_0 %uint_0 %uint_100 %33
%14 = OpCopyObject %uint %32
OpStore %6 %14
%18 = OpLoad %ulong %4
%34 = OpConvertUToPtr %_ptr_Generic_uint %18
%53 = OpBitcast %_ptr_Generic_uchar %34
%54 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %53 %ulong_4_0
%27 = OpBitcast %_ptr_Generic_uint %54
%17 = OpLoad %uint %27 Aligned 4
OpStore %7 %17
%19 = OpLoad %ulong %5
%20 = OpLoad %uint %6
%35 = OpConvertUToPtr %_ptr_Generic_uint %19
OpStore %35 %20 Aligned 4
%21 = OpLoad %ulong %5
%22 = OpLoad %uint %7
%36 = OpConvertUToPtr %_ptr_Generic_uint %21
%55 = OpBitcast %_ptr_Generic_uchar %36
%56 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %55 %ulong_4_1
%29 = OpBitcast %_ptr_Generic_uint %56
OpStore %29 %22 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,87 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%47 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "atom_inc"
OpDecorate %38 LinkageAttributes "__zluda_ptx_impl__atom_relaxed_gpu_generic_inc" Import
OpDecorate %42 LinkageAttributes "__zluda_ptx_impl__atom_relaxed_gpu_global_inc" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%_ptr_Generic_uint = OpTypePointer Generic %uint
%51 = OpTypeFunction %uint %_ptr_Generic_uint %uint
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%53 = OpTypeFunction %uint %_ptr_CrossWorkgroup_uint %uint
%ulong = OpTypeInt 64 0
%55 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%uint_101 = OpConstant %uint 101
%uint_101_0 = OpConstant %uint 101
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_8 = OpConstant %ulong 8
%38 = OpFunction %uint None %51
%40 = OpFunctionParameter %_ptr_Generic_uint
%41 = OpFunctionParameter %uint
OpFunctionEnd
%42 = OpFunction %uint None %53
%44 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
%45 = OpFunctionParameter %uint
OpFunctionEnd
%1 = OpFunction %void None %55
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%37 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_uint Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12
%14 = OpLoad %ulong %4
%31 = OpConvertUToPtr %_ptr_Generic_uint %14
%13 = OpFunctionCall %uint %38 %31 %uint_101
OpStore %6 %13
%16 = OpLoad %ulong %4
%32 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %16
%15 = OpFunctionCall %uint %42 %32 %uint_101_0
OpStore %7 %15
%18 = OpLoad %ulong %4
%33 = OpConvertUToPtr %_ptr_Generic_uint %18
%17 = OpLoad %uint %33 Aligned 4
OpStore %8 %17
%19 = OpLoad %ulong %5
%20 = OpLoad %uint %6
%34 = OpConvertUToPtr %_ptr_Generic_uint %19
OpStore %34 %20 Aligned 4
%21 = OpLoad %ulong %5
%22 = OpLoad %uint %7
%35 = OpConvertUToPtr %_ptr_Generic_uint %21
%60 = OpBitcast %_ptr_Generic_uchar %35
%61 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %60 %ulong_4
%28 = OpBitcast %_ptr_Generic_uint %61
OpStore %28 %22 Aligned 4
%23 = OpLoad %ulong %5
%24 = OpLoad %uint %8
%36 = OpConvertUToPtr %_ptr_Generic_uint %23
%62 = OpBitcast %_ptr_Generic_uchar %36
%63 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %62 %ulong_8
%30 = OpBitcast %_ptr_Generic_uint %63
OpStore %30 %24 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,50 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%24 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "b64tof64"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%27 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%double = OpTypeFloat 64
%_ptr_Function_double = OpTypePointer Function %double
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %27
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%22 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_double Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%18 = OpBitcast %_ptr_Function_double %2
%10 = OpLoad %double %18 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %6 %11
%13 = OpLoad %double %4
%19 = OpBitcast %ulong %13
%12 = OpCopyObject %ulong %19
OpStore %5 %12
%15 = OpLoad %ulong %5
%20 = OpConvertUToPtr %_ptr_Generic_ulong %15
%14 = OpLoad %ulong %20 Aligned 8
OpStore %7 %14
%16 = OpLoad %ulong %6
%17 = OpLoad %ulong %7
%21 = OpConvertUToPtr %_ptr_Generic_ulong %16
OpStore %21 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,76 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%40 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "bfe"
OpDecorate %34 LinkageAttributes "__zluda_ptx_impl__bfe_u32" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%43 = OpTypeFunction %uint %uint %uint %uint
%ulong = OpTypeInt 64 0
%45 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_8 = OpConstant %ulong 8
%34 = OpFunction %uint None %43
%36 = OpFunctionParameter %uint
%37 = OpFunctionParameter %uint
%38 = OpFunctionParameter %uint
OpFunctionEnd
%1 = OpFunction %void None %45
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%33 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_uint Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12
%14 = OpLoad %ulong %4
%29 = OpConvertUToPtr %_ptr_Generic_uint %14
%13 = OpLoad %uint %29 Aligned 4
OpStore %6 %13
%16 = OpLoad %ulong %4
%30 = OpConvertUToPtr %_ptr_Generic_uint %16
%51 = OpBitcast %_ptr_Generic_uchar %30
%52 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %51 %ulong_4
%26 = OpBitcast %_ptr_Generic_uint %52
%15 = OpLoad %uint %26 Aligned 4
OpStore %7 %15
%18 = OpLoad %ulong %4
%31 = OpConvertUToPtr %_ptr_Generic_uint %18
%53 = OpBitcast %_ptr_Generic_uchar %31
%54 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %53 %ulong_8
%28 = OpBitcast %_ptr_Generic_uint %54
%17 = OpLoad %uint %28 Aligned 4
OpStore %8 %17
%20 = OpLoad %uint %6
%21 = OpLoad %uint %7
%22 = OpLoad %uint %8
%19 = OpFunctionCall %uint %34 %20 %21 %22
OpStore %6 %19
%23 = OpLoad %ulong %5
%24 = OpLoad %uint %6
%32 = OpConvertUToPtr %_ptr_Generic_uint %23
OpStore %32 %24 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,90 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%51 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "bfi"
OpDecorate %44 LinkageAttributes "__zluda_ptx_impl__bfi_b32" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%54 = OpTypeFunction %uint %uint %uint %uint %uint
%ulong = OpTypeInt 64 0
%56 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_8 = OpConstant %ulong 8
%ulong_12 = OpConstant %ulong 12
%44 = OpFunction %uint None %54
%46 = OpFunctionParameter %uint
%47 = OpFunctionParameter %uint
%48 = OpFunctionParameter %uint
%49 = OpFunctionParameter %uint
OpFunctionEnd
%1 = OpFunction %void None %56
%10 = OpFunctionParameter %ulong
%11 = OpFunctionParameter %ulong
%43 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_uint Function
%9 = OpVariable %_ptr_Function_uint Function
OpStore %2 %10
OpStore %3 %11
%12 = OpLoad %ulong %2 Aligned 8
OpStore %4 %12
%13 = OpLoad %ulong %3 Aligned 8
OpStore %5 %13
%15 = OpLoad %ulong %4
%35 = OpConvertUToPtr %_ptr_Generic_uint %15
%14 = OpLoad %uint %35 Aligned 4
OpStore %6 %14
%17 = OpLoad %ulong %4
%36 = OpConvertUToPtr %_ptr_Generic_uint %17
%62 = OpBitcast %_ptr_Generic_uchar %36
%63 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %62 %ulong_4
%30 = OpBitcast %_ptr_Generic_uint %63
%16 = OpLoad %uint %30 Aligned 4
OpStore %7 %16
%19 = OpLoad %ulong %4
%37 = OpConvertUToPtr %_ptr_Generic_uint %19
%64 = OpBitcast %_ptr_Generic_uchar %37
%65 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %64 %ulong_8
%32 = OpBitcast %_ptr_Generic_uint %65
%18 = OpLoad %uint %32 Aligned 4
OpStore %8 %18
%21 = OpLoad %ulong %4
%38 = OpConvertUToPtr %_ptr_Generic_uint %21
%66 = OpBitcast %_ptr_Generic_uchar %38
%67 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %66 %ulong_12
%34 = OpBitcast %_ptr_Generic_uint %67
%20 = OpLoad %uint %34 Aligned 4
OpStore %9 %20
%23 = OpLoad %uint %6
%24 = OpLoad %uint %7
%25 = OpLoad %uint %8
%26 = OpLoad %uint %9
%40 = OpCopyObject %uint %23
%41 = OpCopyObject %uint %24
%39 = OpFunctionCall %uint %44 %40 %41 %25 %26
%22 = OpCopyObject %uint %39
OpStore %6 %22
%27 = OpLoad %ulong %5
%28 = OpLoad %uint %6
%42 = OpConvertUToPtr %_ptr_Generic_uint %27
OpStore %42 %28 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,52 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%27 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "block"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%30 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_1 = OpConstant %ulong 1
%ulong_1_0 = OpConstant %ulong 1
%1 = OpFunction %void None %30
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%25 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
%8 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12
%14 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_ulong %14
%13 = OpLoad %ulong %23 Aligned 8
OpStore %6 %13
%16 = OpLoad %ulong %6
%15 = OpIAdd %ulong %16 %ulong_1
OpStore %7 %15
%18 = OpLoad %ulong %8
%17 = OpIAdd %ulong %18 %ulong_1_0
OpStore %8 %17
%19 = OpLoad %ulong %5
%20 = OpLoad %ulong %7
%24 = OpConvertUToPtr %_ptr_Generic_ulong %19
OpStore %24 %20 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,57 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%29 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "bra"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%32 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_1 = OpConstant %ulong 1
%ulong_2 = OpConstant %ulong 2
%1 = OpFunction %void None %32
%11 = OpFunctionParameter %ulong
%12 = OpFunctionParameter %ulong
%27 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
%8 = OpVariable %_ptr_Function_ulong Function
%9 = OpVariable %_ptr_Function_ulong Function
%10 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %11
OpStore %3 %12
%13 = OpLoad %ulong %2 Aligned 8
OpStore %7 %13
%14 = OpLoad %ulong %3 Aligned 8
OpStore %8 %14
%16 = OpLoad %ulong %7
%25 = OpConvertUToPtr %_ptr_Generic_ulong %16
%15 = OpLoad %ulong %25 Aligned 8
OpStore %9 %15
OpBranch %4
%4 = OpLabel
%18 = OpLoad %ulong %9
%17 = OpIAdd %ulong %18 %ulong_1
OpStore %10 %17
OpBranch %6
%35 = OpLabel
%20 = OpLoad %ulong %9
%19 = OpIAdd %ulong %20 %ulong_2
OpStore %10 %19
OpBranch %6
%6 = OpLabel
%21 = OpLoad %ulong %8
%22 = OpLoad %ulong %10
%26 = OpConvertUToPtr %_ptr_Generic_ulong %21
OpStore %26 %22 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,52 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%24 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "brev"
OpDecorate %20 LinkageAttributes "__zluda_ptx_impl__brev_b32" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%27 = OpTypeFunction %uint %uint
%ulong = OpTypeInt 64 0
%29 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%20 = OpFunction %uint None %27
%22 = OpFunctionParameter %uint
OpFunctionEnd
%1 = OpFunction %void None %29
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%19 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%17 = OpConvertUToPtr %_ptr_Generic_uint %12
%11 = OpLoad %uint %17 Aligned 4
OpStore %6 %11
%14 = OpLoad %uint %6
%13 = OpFunctionCall %uint %20 %14
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %uint %6
%18 = OpConvertUToPtr %_ptr_Generic_uint %15
OpStore %18 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,71 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
%37 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %4 "call"
OpExecutionMode %4 ContractionOff
OpDecorate %4 LinkageAttributes "call" Export
OpDecorate %1 LinkageAttributes "incr" Export
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%40 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%44 = OpTypeFunction %void %_ptr_Function_ulong %_ptr_Function_ulong
%ulong_1 = OpConstant %ulong 1
%4 = OpFunction %void None %40
%12 = OpFunctionParameter %ulong
%13 = OpFunctionParameter %ulong
%26 = OpLabel
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
%8 = OpVariable %_ptr_Function_ulong Function
%9 = OpVariable %_ptr_Function_ulong Function
%10 = OpVariable %_ptr_Function_ulong Function
%11 = OpVariable %_ptr_Function_ulong Function
OpStore %5 %12
OpStore %6 %13
%14 = OpLoad %ulong %5 Aligned 8
OpStore %7 %14
%15 = OpLoad %ulong %6 Aligned 8
OpStore %8 %15
%17 = OpLoad %ulong %7
%22 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %17
%16 = OpLoad %ulong %22 Aligned 8
OpStore %9 %16
%18 = OpLoad %ulong %9
%23 = OpBitcast %_ptr_Function_ulong %10
%24 = OpCopyObject %ulong %18
OpStore %23 %24 Aligned 8
%43 = OpFunctionCall %void %1 %10 %11
%19 = OpLoad %ulong %11 Aligned 8
OpStore %9 %19
%20 = OpLoad %ulong %8
%21 = OpLoad %ulong %9
%25 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %20
OpStore %25 %21 Aligned 8
OpReturn
OpFunctionEnd
%1 = OpFunction %void None %44
%28 = OpFunctionParameter %_ptr_Function_ulong
%27 = OpFunctionParameter %_ptr_Function_ulong
%35 = OpLabel
%29 = OpVariable %_ptr_Function_ulong Function
%30 = OpLoad %ulong %28 Aligned 8
OpStore %29 %30
%32 = OpLoad %ulong %29
%31 = OpIAdd %ulong %32 %ulong_1
OpStore %29 %31
%33 = OpLoad %ulong %29
OpStore %27 %33 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,52 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%22 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "clz"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%25 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%1 = OpFunction %void None %25
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%20 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%17 = OpConvertUToPtr %_ptr_Generic_uint %12
%11 = OpLoad %uint %17 Aligned 4
OpStore %6 %11
%14 = OpLoad %uint %6
%18 = OpExtInst %uint %22 clz %14
%13 = OpCopyObject %uint %18
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %uint %6
%19 = OpConvertUToPtr %_ptr_Generic_uint %15
OpStore %19 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,112 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%53 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %2 "const" %1
OpExecutionMode %2 ContractionOff
OpDecorate %1 Alignment 8
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%ushort = OpTypeInt 16 0
%uint_4 = OpConstant %uint 4
%_arr_ushort_uint_4 = OpTypeArray %ushort %uint_4
%ushort_10 = OpConstant %ushort 10
%ushort_20 = OpConstant %ushort 20
%ushort_30 = OpConstant %ushort 30
%ushort_40 = OpConstant %ushort 40
%63 = OpConstantComposite %_arr_ushort_uint_4 %ushort_10 %ushort_20 %ushort_30 %ushort_40
%uint_4_0 = OpConstant %uint 4
%_ptr_UniformConstant__arr_ushort_uint_4 = OpTypePointer UniformConstant %_arr_ushort_uint_4
%1 = OpVariable %_ptr_UniformConstant__arr_ushort_uint_4 UniformConstant %63
%ulong = OpTypeInt 64 0
%67 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_ushort = OpTypePointer Function %ushort
%_ptr_UniformConstant_ushort = OpTypePointer UniformConstant %ushort
%ulong_2 = OpConstant %ulong 2
%uchar = OpTypeInt 8 0
%_ptr_UniformConstant_uchar = OpTypePointer UniformConstant %uchar
%ulong_4 = OpConstant %ulong 4
%ulong_6 = OpConstant %ulong 6
%_ptr_Generic_ushort = OpTypePointer Generic %ushort
%ulong_2_0 = OpConstant %ulong 2
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_4_0 = OpConstant %ulong 4
%ulong_6_0 = OpConstant %ulong 6
%2 = OpFunction %void None %67
%11 = OpFunctionParameter %ulong
%12 = OpFunctionParameter %ulong
%51 = OpLabel
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ushort Function
%8 = OpVariable %_ptr_Function_ushort Function
%9 = OpVariable %_ptr_Function_ushort Function
%10 = OpVariable %_ptr_Function_ushort Function
OpStore %3 %11
OpStore %4 %12
%13 = OpLoad %ulong %3 Aligned 8
OpStore %5 %13
%14 = OpLoad %ulong %4 Aligned 8
OpStore %6 %14
%39 = OpBitcast %_ptr_UniformConstant_ushort %1
%15 = OpLoad %ushort %39 Aligned 2
OpStore %7 %15
%40 = OpBitcast %_ptr_UniformConstant_ushort %1
%73 = OpBitcast %_ptr_UniformConstant_uchar %40
%74 = OpInBoundsPtrAccessChain %_ptr_UniformConstant_uchar %73 %ulong_2
%28 = OpBitcast %_ptr_UniformConstant_ushort %74
%16 = OpLoad %ushort %28 Aligned 2
OpStore %8 %16
%41 = OpBitcast %_ptr_UniformConstant_ushort %1
%75 = OpBitcast %_ptr_UniformConstant_uchar %41
%76 = OpInBoundsPtrAccessChain %_ptr_UniformConstant_uchar %75 %ulong_4
%30 = OpBitcast %_ptr_UniformConstant_ushort %76
%17 = OpLoad %ushort %30 Aligned 2
OpStore %9 %17
%42 = OpBitcast %_ptr_UniformConstant_ushort %1
%77 = OpBitcast %_ptr_UniformConstant_uchar %42
%78 = OpInBoundsPtrAccessChain %_ptr_UniformConstant_uchar %77 %ulong_6
%32 = OpBitcast %_ptr_UniformConstant_ushort %78
%18 = OpLoad %ushort %32 Aligned 2
OpStore %10 %18
%19 = OpLoad %ulong %6
%20 = OpLoad %ushort %7
%43 = OpConvertUToPtr %_ptr_Generic_ushort %19
%44 = OpCopyObject %ushort %20
OpStore %43 %44 Aligned 2
%21 = OpLoad %ulong %6
%22 = OpLoad %ushort %8
%45 = OpConvertUToPtr %_ptr_Generic_ushort %21
%81 = OpBitcast %_ptr_Generic_uchar %45
%82 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %81 %ulong_2_0
%34 = OpBitcast %_ptr_Generic_ushort %82
%46 = OpCopyObject %ushort %22
OpStore %34 %46 Aligned 2
%23 = OpLoad %ulong %6
%24 = OpLoad %ushort %9
%47 = OpConvertUToPtr %_ptr_Generic_ushort %23
%83 = OpBitcast %_ptr_Generic_uchar %47
%84 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %83 %ulong_4_0
%36 = OpBitcast %_ptr_Generic_ushort %84
%48 = OpCopyObject %ushort %24
OpStore %36 %48 Aligned 2
%25 = OpLoad %ulong %6
%26 = OpLoad %ushort %10
%49 = OpConvertUToPtr %_ptr_Generic_ushort %25
%85 = OpBitcast %_ptr_Generic_uchar %49
%86 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %85 %ulong_6_0
%38 = OpBitcast %_ptr_Generic_ushort %86
%50 = OpCopyObject %ushort %26
OpStore %38 %50 Aligned 2
OpReturn
OpFunctionEnd

View File

@ -1,48 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%22 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "constant_f32"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%25 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%float_0_5 = OpConstant %float 0.5
%1 = OpFunction %void None %25
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%20 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_Generic_float %12
%11 = OpLoad %float %18 Aligned 4
OpStore %6 %11
%14 = OpLoad %float %6
%13 = OpFMul %float %14 %float_0_5
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %float %6
%19 = OpConvertUToPtr %_ptr_Generic_float %15
OpStore %19 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,48 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%22 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "constant_negative"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%25 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%uint_4294967295 = OpConstant %uint 4294967295
%1 = OpFunction %void None %25
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%20 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_Generic_uint %12
%11 = OpLoad %uint %18 Aligned 4
OpStore %6 %11
%14 = OpLoad %uint %6
%13 = OpIMul %uint %14 %uint_4294967295
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %uint %6
%19 = OpConvertUToPtr %_ptr_Generic_uint %15
OpStore %19 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,48 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%21 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cos"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%24 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%1 = OpFunction %void None %24
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%19 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%17 = OpConvertUToPtr %_ptr_Generic_float %12
%11 = OpLoad %float %17 Aligned 4
OpStore %6 %11
%14 = OpLoad %float %6
%13 = OpExtInst %float %21 cos %14
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %float %6
%18 = OpConvertUToPtr %_ptr_Generic_float %15
OpStore %18 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,55 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%22 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvt_f64_f32"
OpExecutionMode %1 DenormFlushToZero 16
OpExecutionMode %1 DenormFlushToZero 32
OpExecutionMode %1 DenormFlushToZero 64
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%25 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%double = OpTypeFloat 64
%_ptr_Function_double = OpTypePointer Function %double
%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
%_ptr_Generic_double = OpTypePointer Generic %double
%1 = OpFunction %void None %25
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%20 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_double Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %13
%12 = OpLoad %float %18 Aligned 4
OpStore %6 %12
%15 = OpLoad %float %6
%14 = OpFConvert %double %15
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %double %7
%19 = OpConvertUToPtr %_ptr_Generic_double %16
OpStore %19 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,69 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%34 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvt_rni"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%37 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_4_0 = OpConstant %ulong 4
%1 = OpFunction %void None %37
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%32 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_float Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%28 = OpConvertUToPtr %_ptr_Generic_float %13
%12 = OpLoad %float %28 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%29 = OpConvertUToPtr %_ptr_Generic_float %15
%44 = OpBitcast %_ptr_Generic_uchar %29
%45 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %44 %ulong_4
%25 = OpBitcast %_ptr_Generic_float %45
%14 = OpLoad %float %25 Aligned 4
OpStore %7 %14
%17 = OpLoad %float %6
%16 = OpExtInst %float %34 rint %17
OpStore %6 %16
%19 = OpLoad %float %7
%18 = OpExtInst %float %34 rint %19
OpStore %7 %18
%20 = OpLoad %ulong %5
%21 = OpLoad %float %6
%30 = OpConvertUToPtr %_ptr_Generic_float %20
OpStore %30 %21 Aligned 4
%22 = OpLoad %ulong %5
%23 = OpLoad %float %7
%31 = OpConvertUToPtr %_ptr_Generic_float %22
%46 = OpBitcast %_ptr_Generic_uchar %31
%47 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %46 %ulong_4_0
%27 = OpBitcast %_ptr_Generic_float %47
OpStore %27 %23 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,69 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%34 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvt_rzi"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%37 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_4_0 = OpConstant %ulong 4
%1 = OpFunction %void None %37
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%32 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_float Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%28 = OpConvertUToPtr %_ptr_Generic_float %13
%12 = OpLoad %float %28 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%29 = OpConvertUToPtr %_ptr_Generic_float %15
%44 = OpBitcast %_ptr_Generic_uchar %29
%45 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %44 %ulong_4
%25 = OpBitcast %_ptr_Generic_float %45
%14 = OpLoad %float %25 Aligned 4
OpStore %7 %14
%17 = OpLoad %float %6
%16 = OpExtInst %float %34 trunc %17
OpStore %6 %16
%19 = OpLoad %float %7
%18 = OpExtInst %float %34 trunc %19
OpStore %7 %18
%20 = OpLoad %ulong %5
%21 = OpLoad %float %6
%30 = OpConvertUToPtr %_ptr_Generic_float %20
OpStore %30 %21 Aligned 4
%22 = OpLoad %ulong %5
%23 = OpLoad %float %7
%31 = OpConvertUToPtr %_ptr_Generic_float %22
%46 = OpBitcast %_ptr_Generic_uchar %31
%47 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %46 %ulong_4_0
%27 = OpBitcast %_ptr_Generic_float %47
OpStore %27 %23 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,59 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%24 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvt_s16_s8"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%27 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%uchar = OpTypeInt 8 0
%ushort = OpTypeInt 16 0
%_ptr_Generic_uint = OpTypePointer Generic %uint
%1 = OpFunction %void None %27
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%22 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %13
%12 = OpLoad %uint %18 Aligned 4
OpStore %7 %12
%15 = OpLoad %uint %7
%32 = OpBitcast %uint %15
%34 = OpUConvert %uchar %32
%20 = OpCopyObject %uchar %34
%19 = OpSConvert %ushort %20
%14 = OpSConvert %uint %19
OpStore %6 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %uint %6
%21 = OpConvertUToPtr %_ptr_Generic_uint %16
OpStore %21 %17 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,82 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%42 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvt_s32_f32"
OpDecorate %32 FPRoundingMode RTP
OpDecorate %34 FPRoundingMode RTP
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%45 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%float = OpTypeFloat 32
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%ulong_4_0 = OpConstant %ulong 4
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%1 = OpFunction %void None %45
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%40 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%29 = OpConvertUToPtr %_ptr_Generic_float %13
%28 = OpLoad %float %29 Aligned 4
%12 = OpBitcast %uint %28
OpStore %6 %12
%15 = OpLoad %ulong %4
%30 = OpConvertUToPtr %_ptr_Generic_float %15
%53 = OpBitcast %_ptr_Generic_uchar %30
%54 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %53 %ulong_4
%25 = OpBitcast %_ptr_Generic_float %54
%31 = OpLoad %float %25 Aligned 4
%14 = OpBitcast %uint %31
OpStore %7 %14
%17 = OpLoad %uint %6
%33 = OpBitcast %float %17
%32 = OpConvertFToS %uint %33
%16 = OpCopyObject %uint %32
OpStore %6 %16
%19 = OpLoad %uint %7
%35 = OpBitcast %float %19
%34 = OpConvertFToS %uint %35
%18 = OpCopyObject %uint %34
OpStore %7 %18
%20 = OpLoad %ulong %5
%21 = OpLoad %uint %6
%36 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %20
%37 = OpCopyObject %uint %21
OpStore %36 %37 Aligned 4
%22 = OpLoad %ulong %5
%23 = OpLoad %uint %7
%38 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %22
%57 = OpBitcast %_ptr_CrossWorkgroup_uchar %38
%58 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %57 %ulong_4_0
%27 = OpBitcast %_ptr_CrossWorkgroup_uint %58
%39 = OpCopyObject %uint %23
OpStore %27 %39 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,55 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%24 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvt_s64_s32"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%27 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %27
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%22 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%19 = OpConvertUToPtr %_ptr_Generic_uint %13
%18 = OpLoad %uint %19 Aligned 4
%12 = OpCopyObject %uint %18
OpStore %6 %12
%15 = OpLoad %uint %6
%14 = OpSConvert %ulong %15
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %ulong %7
%20 = OpConvertUToPtr %_ptr_Generic_ulong %16
%21 = OpCopyObject %ulong %17
OpStore %20 %21 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,56 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%25 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvt_sat_s_u"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%28 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%1 = OpFunction %void None %28
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%23 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_uint Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12
%14 = OpLoad %ulong %4
%21 = OpConvertUToPtr %_ptr_Generic_uint %14
%13 = OpLoad %uint %21 Aligned 4
OpStore %6 %13
%16 = OpLoad %uint %6
%15 = OpSatConvertSToU %uint %16
OpStore %7 %15
%18 = OpLoad %uint %7
%17 = OpCopyObject %uint %18
OpStore %8 %17
%19 = OpLoad %ulong %5
%20 = OpLoad %uint %8
%22 = OpConvertUToPtr %_ptr_Generic_uint %19
OpStore %22 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,65 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%37 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "cvta"
%void = OpTypeVoid
%uchar = OpTypeInt 8 0
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%41 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_uchar
%_ptr_Function__ptr_CrossWorkgroup_uchar = OpTypePointer Function %_ptr_CrossWorkgroup_uchar
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
%1 = OpFunction %void None %41
%17 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%18 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
%35 = OpLabel
%2 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%3 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%7 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%8 = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uchar Function
%6 = OpVariable %_ptr_Function_float Function
OpStore %2 %17
OpStore %3 %18
%10 = OpBitcast %_ptr_Function_ulong %2
%9 = OpLoad %ulong %10 Aligned 8
%19 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %9
OpStore %7 %19
%12 = OpBitcast %_ptr_Function_ulong %3
%11 = OpLoad %ulong %12 Aligned 8
%20 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %11
OpStore %8 %20
%21 = OpLoad %_ptr_CrossWorkgroup_uchar %7
%14 = OpConvertPtrToU %ulong %21
%30 = OpCopyObject %ulong %14
%29 = OpCopyObject %ulong %30
%13 = OpCopyObject %ulong %29
%22 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %13
OpStore %7 %22
%23 = OpLoad %_ptr_CrossWorkgroup_uchar %8
%16 = OpConvertPtrToU %ulong %23
%32 = OpCopyObject %ulong %16
%31 = OpCopyObject %ulong %32
%15 = OpCopyObject %ulong %31
%24 = OpConvertUToPtr %_ptr_CrossWorkgroup_uchar %15
OpStore %8 %24
%26 = OpLoad %_ptr_CrossWorkgroup_uchar %7
%33 = OpBitcast %_ptr_CrossWorkgroup_float %26
%25 = OpLoad %float %33 Aligned 4
OpStore %6 %25
%27 = OpLoad %_ptr_CrossWorkgroup_uchar %8
%28 = OpLoad %float %6
%34 = OpBitcast %_ptr_CrossWorkgroup_float %27
OpStore %34 %28 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,60 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%28 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "div_approx"
OpDecorate %16 FPFastMathMode AllowRecip
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%31 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%1 = OpFunction %void None %31
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%26 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_float Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_float %13
%12 = OpLoad %float %23 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_float %15
%38 = OpBitcast %_ptr_Generic_uchar %24
%39 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %38 %ulong_4
%22 = OpBitcast %_ptr_Generic_float %39
%14 = OpLoad %float %22 Aligned 4
OpStore %7 %14
%17 = OpLoad %float %6
%18 = OpLoad %float %7
%16 = OpFDiv %float %17 %18
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %float %6
%25 = OpConvertUToPtr %_ptr_Generic_float %19
OpStore %25 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,48 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%21 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "ex2"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%24 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%1 = OpFunction %void None %24
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%19 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%17 = OpConvertUToPtr %_ptr_Generic_float %12
%11 = OpLoad %float %17 Aligned 4
OpStore %6 %11
%14 = OpLoad %float %6
%13 = OpExtInst %float %21 exp2 %14
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %float %6
%18 = OpConvertUToPtr %_ptr_Generic_float %15
OpStore %18 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,75 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
%31 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %4 "extern_func"
OpExecutionMode %4 ContractionOff
OpDecorate %1 LinkageAttributes "foobar" Import
OpDecorate %12 Alignment 16
OpDecorate %4 LinkageAttributes "extern_func" Export
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%uchar = OpTypeInt 8 0
%uint_16 = OpConstant %uint 16
%_arr_uchar_uint_16 = OpTypeArray %uchar %uint_16
%_ptr_Function__arr_uchar_uint_16 = OpTypePointer Function %_arr_uchar_uint_16
%40 = OpTypeFunction %void %_ptr_Function_ulong %_ptr_Function__arr_uchar_uint_16
%uint_16_0 = OpConstant %uint 16
%42 = OpTypeFunction %void %ulong %ulong
%uint_16_1 = OpConstant %uint 16
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%ulong_0 = OpConstant %ulong 0
%_ptr_Function_uchar = OpTypePointer Function %uchar
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %40
%3 = OpFunctionParameter %_ptr_Function_ulong
%2 = OpFunctionParameter %_ptr_Function__arr_uchar_uint_16
OpFunctionEnd
%4 = OpFunction %void None %42
%13 = OpFunctionParameter %ulong
%14 = OpFunctionParameter %ulong
%29 = OpLabel
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
%8 = OpVariable %_ptr_Function_ulong Function
%9 = OpVariable %_ptr_Function_ulong Function
%10 = OpVariable %_ptr_Function_ulong Function
%11 = OpVariable %_ptr_Function_ulong Function
%12 = OpVariable %_ptr_Function__arr_uchar_uint_16 Function
OpStore %5 %13
OpStore %6 %14
%15 = OpLoad %ulong %5 Aligned 8
OpStore %7 %15
%16 = OpLoad %ulong %6 Aligned 8
OpStore %8 %16
%18 = OpLoad %ulong %7
%25 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %18
%17 = OpLoad %ulong %25 Aligned 8
OpStore %9 %17
%19 = OpLoad %ulong %9
%46 = OpBitcast %_ptr_Function_uchar %11
%47 = OpInBoundsPtrAccessChain %_ptr_Function_uchar %46 %ulong_0
%24 = OpBitcast %_ptr_Function_ulong %47
%26 = OpCopyObject %ulong %19
OpStore %24 %26 Aligned 8
%48 = OpFunctionCall %void %1 %11 %12
%27 = OpBitcast %_ptr_Function_ulong %12
%20 = OpLoad %ulong %27 Aligned 8
OpStore %10 %20
%21 = OpLoad %ulong %8
%22 = OpLoad %ulong %10
%28 = OpConvertUToPtr %_ptr_Generic_ulong %21
OpStore %28 %22 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,56 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%24 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %2 "extern_shared" %1
OpExecutionMode %2 ContractionOff
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
%1 = OpVariable %_ptr_Workgroup_uint Workgroup
%ulong = OpTypeInt 64 0
%29 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%_ptr_Workgroup_ulong = OpTypePointer Workgroup %ulong
%2 = OpFunction %void None %29
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%22 = OpLabel
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %3 %8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%11 = OpLoad %ulong %4 Aligned 8
OpStore %6 %11
%13 = OpLoad %ulong %5
%18 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %13
%12 = OpLoad %ulong %18 Aligned 8
OpStore %7 %12
%14 = OpLoad %ulong %7
%19 = OpBitcast %_ptr_Workgroup_ulong %1
OpStore %19 %14 Aligned 8
%20 = OpBitcast %_ptr_Workgroup_ulong %1
%15 = OpLoad %ulong %20 Aligned 8
OpStore %7 %15
%16 = OpLoad %ulong %6
%17 = OpLoad %ulong %7
%21 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %16
OpStore %21 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,75 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%35 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %12 "extern_shared_call" %1
OpExecutionMode %12 ContractionOff
OpDecorate %1 Alignment 4
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint
%1 = OpVariable %_ptr_Workgroup_uint Workgroup
%39 = OpTypeFunction %void %_ptr_Workgroup_uint
%ulong = OpTypeInt 64 0
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Workgroup_ulong = OpTypePointer Workgroup %ulong
%ulong_2 = OpConstant %ulong 2
%43 = OpTypeFunction %void %ulong %ulong
%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
%2 = OpFunction %void None %39
%34 = OpFunctionParameter %_ptr_Workgroup_uint
%11 = OpLabel
%3 = OpVariable %_ptr_Function_ulong Function
%9 = OpBitcast %_ptr_Workgroup_ulong %34
%4 = OpLoad %ulong %9 Aligned 8
OpStore %3 %4
%6 = OpLoad %ulong %3
%5 = OpIAdd %ulong %6 %ulong_2
OpStore %3 %5
%7 = OpLoad %ulong %3
%10 = OpBitcast %_ptr_Workgroup_ulong %34
OpStore %10 %7 Aligned 8
OpReturn
OpFunctionEnd
%12 = OpFunction %void None %43
%18 = OpFunctionParameter %ulong
%19 = OpFunctionParameter %ulong
%32 = OpLabel
%13 = OpVariable %_ptr_Function_ulong Function
%14 = OpVariable %_ptr_Function_ulong Function
%15 = OpVariable %_ptr_Function_ulong Function
%16 = OpVariable %_ptr_Function_ulong Function
%17 = OpVariable %_ptr_Function_ulong Function
OpStore %13 %18
OpStore %14 %19
%20 = OpLoad %ulong %13 Aligned 8
OpStore %15 %20
%21 = OpLoad %ulong %14 Aligned 8
OpStore %16 %21
%23 = OpLoad %ulong %15
%28 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %23
%22 = OpLoad %ulong %28 Aligned 8
OpStore %17 %22
%24 = OpLoad %ulong %17
%29 = OpBitcast %_ptr_Workgroup_ulong %1
OpStore %29 %24 Aligned 8
%45 = OpFunctionCall %void %2 %1
%30 = OpBitcast %_ptr_Workgroup_ulong %1
%25 = OpLoad %ulong %30 Aligned 8
OpStore %17 %25
%26 = OpLoad %ulong %16
%27 = OpLoad %ulong %17
%31 = OpConvertUToPtr %_ptr_CrossWorkgroup_ulong %26
OpStore %31 %27 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,69 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%35 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "fma"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%38 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_8 = OpConstant %ulong 8
%1 = OpFunction %void None %38
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%33 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_float Function
%8 = OpVariable %_ptr_Function_float Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12
%14 = OpLoad %ulong %4
%29 = OpConvertUToPtr %_ptr_Generic_float %14
%13 = OpLoad %float %29 Aligned 4
OpStore %6 %13
%16 = OpLoad %ulong %4
%30 = OpConvertUToPtr %_ptr_Generic_float %16
%45 = OpBitcast %_ptr_Generic_uchar %30
%46 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %45 %ulong_4
%26 = OpBitcast %_ptr_Generic_float %46
%15 = OpLoad %float %26 Aligned 4
OpStore %7 %15
%18 = OpLoad %ulong %4
%31 = OpConvertUToPtr %_ptr_Generic_float %18
%47 = OpBitcast %_ptr_Generic_uchar %31
%48 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %47 %ulong_8
%28 = OpBitcast %_ptr_Generic_float %48
%17 = OpLoad %float %28 Aligned 4
OpStore %8 %17
%20 = OpLoad %float %6
%21 = OpLoad %float %7
%22 = OpLoad %float %8
%19 = OpExtInst %float %35 fma %20 %21 %22
OpStore %6 %19
%23 = OpLoad %ulong %5
%24 = OpLoad %float %6
%32 = OpConvertUToPtr %_ptr_Generic_float %23
OpStore %32 %24 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,77 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
%39 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %12 "func_ptr"
OpExecutionMode %12 ContractionOff
OpDecorate %12 LinkageAttributes "func_ptr" Export
%void = OpTypeVoid
%float = OpTypeFloat 32
%42 = OpTypeFunction %float %float %float
%_ptr_Function_float = OpTypePointer Function %float
%ulong = OpTypeInt 64 0
%45 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_1 = OpConstant %ulong 1
%ulong_0 = OpConstant %ulong 0
%1 = OpFunction %float None %42
%5 = OpFunctionParameter %float
%6 = OpFunctionParameter %float
%11 = OpLabel
%3 = OpVariable %_ptr_Function_float Function
%4 = OpVariable %_ptr_Function_float Function
%2 = OpVariable %_ptr_Function_float Function
OpStore %3 %5
OpStore %4 %6
%8 = OpLoad %float %3
%9 = OpLoad %float %4
%7 = OpFAdd %float %8 %9
OpStore %2 %7
%10 = OpLoad %float %2
OpReturnValue %10
OpFunctionEnd
%12 = OpFunction %void None %45
%20 = OpFunctionParameter %ulong
%21 = OpFunctionParameter %ulong
%37 = OpLabel
%13 = OpVariable %_ptr_Function_ulong Function
%14 = OpVariable %_ptr_Function_ulong Function
%15 = OpVariable %_ptr_Function_ulong Function
%16 = OpVariable %_ptr_Function_ulong Function
%17 = OpVariable %_ptr_Function_ulong Function
%18 = OpVariable %_ptr_Function_ulong Function
%19 = OpVariable %_ptr_Function_ulong Function
OpStore %13 %20
OpStore %14 %21
%22 = OpLoad %ulong %13 Aligned 8
OpStore %15 %22
%23 = OpLoad %ulong %14 Aligned 8
OpStore %16 %23
%25 = OpLoad %ulong %15
%35 = OpConvertUToPtr %_ptr_Generic_ulong %25
%24 = OpLoad %ulong %35 Aligned 8
OpStore %17 %24
%27 = OpLoad %ulong %17
%26 = OpIAdd %ulong %27 %ulong_1
OpStore %18 %26
%28 = OpCopyObject %ulong %ulong_0
OpStore %19 %28
%30 = OpLoad %ulong %18
%31 = OpLoad %ulong %19
%29 = OpIAdd %ulong %30 %31
OpStore %18 %29
%32 = OpLoad %ulong %16
%33 = OpLoad %ulong %18
%36 = OpConvertUToPtr %_ptr_Generic_ulong %32
OpStore %36 %33 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,53 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%21 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %2 "global_array" %1
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%uint_4 = OpConstant %uint 4
%_arr_uint_uint_4 = OpTypeArray %uint %uint_4
%uint_1 = OpConstant %uint 1
%uint_0 = OpConstant %uint 0
%28 = OpConstantComposite %_arr_uint_uint_4 %uint_1 %uint_0 %uint_0 %uint_0
%uint_4_0 = OpConstant %uint 4
%_ptr_CrossWorkgroup__arr_uint_uint_4 = OpTypePointer CrossWorkgroup %_arr_uint_uint_4
%1 = OpVariable %_ptr_CrossWorkgroup__arr_uint_uint_4 CrossWorkgroup %28
%ulong = OpTypeInt 64 0
%32 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%2 = OpFunction %void None %32
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%19 = OpLabel
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %3 %8
OpStore %4 %9
%16 = OpConvertPtrToU %ulong %1
%10 = OpCopyObject %ulong %16
OpStore %5 %10
%11 = OpLoad %ulong %4 Aligned 8
OpStore %6 %11
%13 = OpLoad %ulong %5
%17 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %13
%12 = OpLoad %uint %17 Aligned 4
OpStore %7 %12
%14 = OpLoad %ulong %6
%15 = OpLoad %uint %7
%18 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %14
OpStore %18 %15 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,53 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%24 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "implicit_param"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%27 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
%1 = OpFunction %void None %27
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%22 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %13
%12 = OpLoad %float %18 Aligned 4
OpStore %6 %12
%14 = OpLoad %float %6
%19 = OpBitcast %_ptr_Function_float %7
OpStore %19 %14 Aligned 4
%20 = OpBitcast %_ptr_Function_float %7
%15 = OpLoad %float %20 Aligned 4
OpStore %6 %15
%16 = OpLoad %ulong %5
%17 = OpLoad %float %6
%21 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %16
OpStore %21 %17 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,70 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%40 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "lanemask_lt"
OpExecutionMode %1 ContractionOff
OpDecorate %11 LinkageAttributes "__zluda_ptx_impl__sreg_lanemask_lt" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%43 = OpTypeFunction %uint
%ulong = OpTypeInt 64 0
%45 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%uint_1 = OpConstant %uint 1
%11 = OpFunction %uint None %43
OpFunctionEnd
%1 = OpFunction %void None %45
%13 = OpFunctionParameter %ulong
%14 = OpFunctionParameter %ulong
%38 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_uint Function
OpStore %2 %13
OpStore %3 %14
%15 = OpLoad %ulong %2 Aligned 8
OpStore %4 %15
%16 = OpLoad %ulong %3 Aligned 8
OpStore %5 %16
%18 = OpLoad %ulong %4
%29 = OpConvertUToPtr %_ptr_Generic_uint %18
%28 = OpLoad %uint %29 Aligned 4
%17 = OpCopyObject %uint %28
OpStore %6 %17
%20 = OpLoad %uint %6
%31 = OpCopyObject %uint %20
%30 = OpIAdd %uint %31 %uint_1
%19 = OpCopyObject %uint %30
OpStore %7 %19
%10 = OpFunctionCall %uint %11
%32 = OpCopyObject %uint %10
%21 = OpCopyObject %uint %32
OpStore %8 %21
%23 = OpLoad %uint %7
%24 = OpLoad %uint %8
%34 = OpCopyObject %uint %23
%35 = OpCopyObject %uint %24
%33 = OpIAdd %uint %34 %35
%22 = OpCopyObject %uint %33
OpStore %7 %22
%25 = OpLoad %ulong %5
%26 = OpLoad %uint %7
%36 = OpConvertUToPtr %_ptr_Generic_uint %25
%37 = OpCopyObject %uint %26
OpStore %36 %37 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,42 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%19 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "ld_st"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%22 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %22
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%17 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%15 = OpConvertUToPtr %_ptr_Generic_ulong %12
%11 = OpLoad %ulong %15 Aligned 8
OpStore %6 %11
%13 = OpLoad %ulong %5
%14 = OpLoad %ulong %6
%16 = OpConvertUToPtr %_ptr_Generic_ulong %13
OpStore %16 %14 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,56 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%23 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "ld_st_implicit"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%26 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%ulong_81985529216486895 = OpConstant %ulong 81985529216486895
%float = OpTypeFloat 32
%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
%uint = OpTypeInt 32 0
%1 = OpFunction %void None %26
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%21 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%11 = OpCopyObject %ulong %ulong_81985529216486895
OpStore %6 %11
%13 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %13
%17 = OpLoad %float %18 Aligned 4
%31 = OpBitcast %uint %17
%12 = OpUConvert %ulong %31
OpStore %6 %12
%14 = OpLoad %ulong %5
%15 = OpLoad %ulong %6
%19 = OpConvertUToPtr %_ptr_CrossWorkgroup_float %14
%32 = OpBitcast %ulong %15
%33 = OpUConvert %uint %32
%20 = OpBitcast %float %33
OpStore %19 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,63 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%30 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "ld_st_offset"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%33 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_4_0 = OpConstant %ulong 4
%1 = OpFunction %void None %33
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%28 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_uint %13
%12 = OpLoad %uint %24 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%25 = OpConvertUToPtr %_ptr_Generic_uint %15
%40 = OpBitcast %_ptr_Generic_uchar %25
%41 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %40 %ulong_4
%21 = OpBitcast %_ptr_Generic_uint %41
%14 = OpLoad %uint %21 Aligned 4
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %uint %7
%26 = OpConvertUToPtr %_ptr_Generic_uint %16
OpStore %26 %17 Aligned 4
%18 = OpLoad %ulong %5
%19 = OpLoad %uint %6
%27 = OpConvertUToPtr %_ptr_Generic_uint %18
%42 = OpBitcast %_ptr_Generic_uchar %27
%43 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %42 %ulong_4_0
%23 = OpBitcast %_ptr_Generic_uint %43
OpStore %23 %19 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,48 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%21 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "lg2"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%24 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%1 = OpFunction %void None %24
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%19 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%17 = OpConvertUToPtr %_ptr_Generic_float %12
%11 = OpLoad %float %17 Aligned 4
OpStore %6 %11
%14 = OpLoad %float %6
%13 = OpExtInst %float %21 log2 %14
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %float %6
%18 = OpConvertUToPtr %_ptr_Generic_float %15
OpStore %18 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,49 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%20 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "local_align"
OpDecorate %4 Alignment 8
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%23 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%uchar = OpTypeInt 8 0
%uint_8 = OpConstant %uint 8
%_arr_uchar_uint_8 = OpTypeArray %uchar %uint_8
%_ptr_Function__arr_uchar_uint_8 = OpTypePointer Function %_arr_uchar_uint_8
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %23
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%18 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function__arr_uchar_uint_8 Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %5 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %6 %11
%13 = OpLoad %ulong %5
%16 = OpConvertUToPtr %_ptr_Generic_ulong %13
%12 = OpLoad %ulong %16 Aligned 8
OpStore %7 %12
%14 = OpLoad %ulong %6
%15 = OpLoad %ulong %7
%17 = OpConvertUToPtr %_ptr_Generic_ulong %14
OpStore %17 %15 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,87 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%46 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mad_s32"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%49 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%ulong_8 = OpConstant %ulong 8
%ulong_4_0 = OpConstant %ulong 4
%ulong_8_0 = OpConstant %ulong 8
%1 = OpFunction %void None %49
%10 = OpFunctionParameter %ulong
%11 = OpFunctionParameter %ulong
%44 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_uint Function
%9 = OpVariable %_ptr_Function_uint Function
OpStore %2 %10
OpStore %3 %11
%12 = OpLoad %ulong %2 Aligned 8
OpStore %4 %12
%13 = OpLoad %ulong %3 Aligned 8
OpStore %5 %13
%15 = OpLoad %ulong %4
%38 = OpConvertUToPtr %_ptr_Generic_uint %15
%14 = OpLoad %uint %38 Aligned 4
OpStore %7 %14
%17 = OpLoad %ulong %4
%39 = OpConvertUToPtr %_ptr_Generic_uint %17
%56 = OpBitcast %_ptr_Generic_uchar %39
%57 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %56 %ulong_4
%31 = OpBitcast %_ptr_Generic_uint %57
%16 = OpLoad %uint %31 Aligned 4
OpStore %8 %16
%19 = OpLoad %ulong %4
%40 = OpConvertUToPtr %_ptr_Generic_uint %19
%58 = OpBitcast %_ptr_Generic_uchar %40
%59 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %58 %ulong_8
%33 = OpBitcast %_ptr_Generic_uint %59
%18 = OpLoad %uint %33 Aligned 4
OpStore %9 %18
%21 = OpLoad %uint %7
%22 = OpLoad %uint %8
%23 = OpLoad %uint %9
%60 = OpIMul %uint %21 %22
%20 = OpIAdd %uint %23 %60
OpStore %6 %20
%24 = OpLoad %ulong %5
%25 = OpLoad %uint %6
%41 = OpConvertUToPtr %_ptr_Generic_uint %24
OpStore %41 %25 Aligned 4
%26 = OpLoad %ulong %5
%27 = OpLoad %uint %6
%42 = OpConvertUToPtr %_ptr_Generic_uint %26
%61 = OpBitcast %_ptr_Generic_uchar %42
%62 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %61 %ulong_4_0
%35 = OpBitcast %_ptr_Generic_uint %62
OpStore %35 %27 Aligned 4
%28 = OpLoad %ulong %5
%29 = OpLoad %uint %6
%43 = OpConvertUToPtr %_ptr_Generic_uint %28
%63 = OpBitcast %_ptr_Generic_uchar %43
%64 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %63 %ulong_8_0
%37 = OpBitcast %_ptr_Generic_uint %64
OpStore %37 %29 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,59 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%28 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "max"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%31 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%1 = OpFunction %void None %31
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%26 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_uint %13
%12 = OpLoad %uint %23 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_uint %15
%38 = OpBitcast %_ptr_Generic_uchar %24
%39 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %38 %ulong_4
%22 = OpBitcast %_ptr_Generic_uint %39
%14 = OpLoad %uint %22 Aligned 4
OpStore %7 %14
%17 = OpLoad %uint %6
%18 = OpLoad %uint %7
%16 = OpExtInst %uint %28 s_max %17 %18
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %uint %6
%25 = OpConvertUToPtr %_ptr_Generic_uint %19
OpStore %25 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,49 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%20 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "membar"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%23 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%uint_0 = OpConstant %uint 0
%uint_784 = OpConstant %uint 784
%1 = OpFunction %void None %23
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%18 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%16 = OpConvertUToPtr %_ptr_Generic_uint %12
%15 = OpLoad %uint %16 Aligned 4
%11 = OpCopyObject %uint %15
OpStore %6 %11
OpMemoryBarrier %uint_0 %uint_784
%13 = OpLoad %ulong %5
%14 = OpLoad %uint %6
%17 = OpConvertUToPtr %_ptr_Generic_uint %13
OpStore %17 %14 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,59 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%28 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "min"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%31 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%1 = OpFunction %void None %31
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%26 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_uint %13
%12 = OpLoad %uint %23 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_uint %15
%38 = OpBitcast %_ptr_Generic_uchar %24
%39 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %38 %ulong_4
%22 = OpBitcast %_ptr_Generic_uint %39
%14 = OpLoad %uint %22 Aligned 4
OpStore %7 %14
%17 = OpLoad %uint %6
%18 = OpLoad %uint %7
%16 = OpExtInst %uint %28 s_min %17 %18
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %uint %6
%25 = OpConvertUToPtr %_ptr_Generic_uint %19
OpStore %25 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,31 +1,11 @@
use crate::pass; use crate::pass;
use crate::ptx;
use crate::translate;
use hip_runtime_sys::hipError_t; use hip_runtime_sys::hipError_t;
use rspirv::{
binary::{Assemble, Disassemble},
dr::{Block, Function, Instruction, Loader, Operand},
};
use spirv_headers::Word;
use spirv_tools_sys::{
spv_binary, spv_endianness_t, spv_parsed_instruction_t, spv_result_t, spv_target_env,
};
use std::collections::hash_map::Entry;
use std::error; use std::error;
use std::ffi::{c_void, CStr, CString}; use std::ffi::{CStr, CString};
use std::fmt; use std::fmt;
use std::fmt::{Debug, Display, Formatter}; use std::fmt::{Debug, Display, Formatter};
use std::fs::File;
use std::hash::Hash;
use std::io;
use std::io::Read;
use std::io::Write;
use std::mem; use std::mem;
use std::path::Path; use std::{ptr, str};
use std::process::Command;
use std::slice;
use std::{borrow::Cow, collections::HashMap, env, fs, path::PathBuf, ptr, str};
use tempfile::NamedTempFile;
macro_rules! test_ptx { macro_rules! test_ptx {
($fn_name:ident, $input:expr, $output:expr) => { ($fn_name:ident, $input:expr, $output:expr) => {
@ -65,7 +45,6 @@ test_ptx!(setp_leu, [1f32, f32::NAN], [1f32]);
test_ptx!(bra, [10u64], [11u64]); test_ptx!(bra, [10u64], [11u64]);
test_ptx!(not, [0u64], [u64::max_value()]); test_ptx!(not, [0u64], [u64::max_value()]);
test_ptx!(shl, [11u64], [44u64]); test_ptx!(shl, [11u64], [44u64]);
test_ptx!(shl_link_hack, [11u64], [44u64]);
test_ptx!(cvt_sat_s_u, [-1i32], [0i32]); test_ptx!(cvt_sat_s_u, [-1i32], [0i32]);
test_ptx!(cvta, [3.0f32], [3.0f32]); test_ptx!(cvta, [3.0f32], [3.0f32]);
test_ptx!(block, [1u64], [2u64]); test_ptx!(block, [1u64], [2u64]);
@ -236,7 +215,7 @@ fn test_hip_assert<
output: &mut [Output], output: &mut [Output],
) -> Result<(), Box<dyn error::Error + 'a>> { ) -> Result<(), Box<dyn error::Error + 'a>> {
let ast = ptx_parser::parse_module_checked(ptx_text).unwrap(); let ast = ptx_parser::parse_module_checked(ptx_text).unwrap();
let llvm_ir = pass::to_llvm_module2(ast).unwrap(); let llvm_ir = pass::to_llvm_module(ast).unwrap();
let name = CString::new(name)?; let name = CString::new(name)?;
let result = let result =
run_hip(name.as_c_str(), llvm_ir, input, output).map_err(|err| DisplayError { err })?; run_hip(name.as_c_str(), llvm_ir, input, output).map_err(|err| DisplayError { err })?;
@ -326,6 +305,7 @@ fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Def
let elf_module = comgr::compile_bitcode( let elf_module = comgr::compile_bitcode(
unsafe { CStr::from_ptr(dev_props.gcnArchName.as_ptr()) }, unsafe { CStr::from_ptr(dev_props.gcnArchName.as_ptr()) },
&*module.llvm_ir, &*module.llvm_ir,
module.linked_bitcode(),
) )
.unwrap(); .unwrap();
let mut module = ptr::null_mut(); let mut module = ptr::null_mut();
@ -381,226 +361,3 @@ fn run_hip<Input: From<u8> + Copy + Debug, Output: From<u8> + Copy + Debug + Def
} }
Ok(result) Ok(result)
} }
struct EqMap<T>
where
T: Eq + Copy + Hash,
{
m1: HashMap<T, T>,
m2: HashMap<T, T>,
}
impl<T: Copy + Eq + Hash> EqMap<T> {
fn new() -> Self {
EqMap {
m1: HashMap::new(),
m2: HashMap::new(),
}
}
fn is_equal(&mut self, t1: T, t2: T) -> bool {
match (self.m1.entry(t1), self.m2.entry(t2)) {
(Entry::Occupied(entry1), Entry::Occupied(entry2)) => {
*entry1.get() == t2 && *entry2.get() == t1
}
(Entry::Vacant(entry1), Entry::Vacant(entry2)) => {
entry1.insert(t2);
entry2.insert(t1);
true
}
_ => false,
}
}
}
fn is_spirv_fns_equal(fns1: &[Function], fns2: &[Function]) -> bool {
if fns1.len() != fns2.len() {
return false;
}
for (fn1, fn2) in fns1.iter().zip(fns2.iter()) {
if !is_spirv_fn_equal(fn1, fn2) {
return false;
}
}
true
}
fn is_spirv_fn_equal(fn1: &Function, fn2: &Function) -> bool {
let mut map = EqMap::new();
if !is_option_equal(&fn1.def, &fn2.def, &mut map, is_instr_equal) {
return false;
}
if !is_option_equal(&fn1.end, &fn2.end, &mut map, is_instr_equal) {
return false;
}
if fn1.parameters.len() != fn2.parameters.len() {
return false;
}
for (inst1, inst2) in fn1.parameters.iter().zip(fn2.parameters.iter()) {
if !is_instr_equal(inst1, inst2, &mut map) {
return false;
}
}
if fn1.blocks.len() != fn2.blocks.len() {
return false;
}
for (b1, b2) in fn1.blocks.iter().zip(fn2.blocks.iter()) {
if !is_block_equal(b1, b2, &mut map) {
return false;
}
}
true
}
fn is_block_equal(b1: &Block, b2: &Block, map: &mut EqMap<Word>) -> bool {
if !is_option_equal(&b1.label, &b2.label, map, is_instr_equal) {
return false;
}
if b1.instructions.len() != b2.instructions.len() {
return false;
}
for (inst1, inst2) in b1.instructions.iter().zip(b2.instructions.iter()) {
if !is_instr_equal(inst1, inst2, map) {
return false;
}
}
true
}
fn is_instr_equal(instr1: &Instruction, instr2: &Instruction, map: &mut EqMap<Word>) -> bool {
if instr1.class.opcode != instr2.class.opcode {
return false;
}
if !is_option_equal(&instr1.result_type, &instr2.result_type, map, is_word_equal) {
return false;
}
if !is_option_equal(&instr1.result_id, &instr2.result_id, map, is_word_equal) {
return false;
}
if instr1.operands.len() != instr2.operands.len() {
return false;
}
for (o1, o2) in instr1.operands.iter().zip(instr2.operands.iter()) {
match (o1, o2) {
(Operand::IdMemorySemantics(w1), Operand::IdMemorySemantics(w2)) => {
if !is_word_equal(w1, w2, map) {
return false;
}
}
(Operand::IdScope(w1), Operand::IdScope(w2)) => {
if !is_word_equal(w1, w2, map) {
return false;
}
}
(Operand::IdRef(w1), Operand::IdRef(w2)) => {
if !is_word_equal(w1, w2, map) {
return false;
}
}
(o1, o2) => {
if o1 != o2 {
return false;
}
}
}
}
true
}
fn is_word_equal(t1: &Word, t2: &Word, map: &mut EqMap<Word>) -> bool {
map.is_equal(*t1, *t2)
}
fn is_option_equal<T, F: FnOnce(&T, &T, &mut EqMap<Word>) -> bool>(
o1: &Option<T>,
o2: &Option<T>,
map: &mut EqMap<Word>,
f: F,
) -> bool {
match (o1, o2) {
(Some(t1), Some(t2)) => f(t1, t2, map),
(None, None) => true,
_ => panic!(),
}
}
unsafe extern "C" fn parse_header_cb(
user_data: *mut c_void,
endian: spv_endianness_t,
magic: u32,
version: u32,
generator: u32,
id_bound: u32,
reserved: u32,
) -> spv_result_t {
if endian == spv_endianness_t::SPV_ENDIANNESS_BIG {
return spv_result_t::SPV_UNSUPPORTED;
}
let result_vec: &mut Vec<u32> = std::mem::transmute(user_data);
result_vec.push(magic);
result_vec.push(version);
result_vec.push(generator);
result_vec.push(id_bound);
result_vec.push(reserved);
spv_result_t::SPV_SUCCESS
}
unsafe extern "C" fn parse_instruction_cb(
user_data: *mut c_void,
inst: *const spv_parsed_instruction_t,
) -> spv_result_t {
let inst = &*inst;
let result_vec: &mut Vec<u32> = std::mem::transmute(user_data);
for i in 0..inst.num_words {
result_vec.push(*(inst.words.add(i as usize)));
}
spv_result_t::SPV_SUCCESS
}
const LLVM_SPIRV: &'static str = "/home/vosen/amd/llvm-project/build/bin/llvm-spirv";
const AMDGPU: &'static str = "/opt/rocm/";
const AMDGPU_TARGET: &'static str = "amdgcn-amd-amdhsa";
const AMDGPU_BITCODE: [&'static str; 8] = [
"opencl.bc",
"ocml.bc",
"ockl.bc",
"oclc_correctly_rounded_sqrt_off.bc",
"oclc_daz_opt_on.bc",
"oclc_finite_only_off.bc",
"oclc_unsafe_math_off.bc",
"oclc_wavefrontsize64_off.bc",
];
const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
fn persist_file(path: &Path) -> io::Result<()> {
let mut persistent = PathBuf::from("/tmp/zluda");
std::fs::create_dir_all(&persistent)?;
persistent.push(path.file_name().unwrap());
std::fs::copy(path, persistent)?;
Ok(())
}
fn get_bitcode_paths(device_name: &str) -> impl Iterator<Item = PathBuf> {
let generic_paths = AMDGPU_BITCODE.iter().map(|x| {
let mut path = PathBuf::from(AMDGPU);
path.push("amdgcn");
path.push("bitcode");
path.push(x);
path
});
let suffix = if let Some(suffix_idx) = device_name.find(':') {
suffix_idx
} else {
device_name.len()
};
let mut additional_path = PathBuf::from(AMDGPU);
additional_path.push("amdgcn");
additional_path.push("bitcode");
additional_path.push(format!(
"{}{}{}",
AMDGPU_BITCODE_DEVICE_PREFIX,
&device_name[3..suffix],
".bc"
));
generic_paths.chain(std::iter::once(additional_path))
}

View File

@ -1,46 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%22 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mov"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%25 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %25
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%20 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_Generic_ulong %13
%12 = OpLoad %ulong %18 Aligned 8
OpStore %6 %12
%15 = OpLoad %ulong %6
%14 = OpCopyObject %ulong %15
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %ulong %7
%19 = OpConvertUToPtr %_ptr_Generic_ulong %16
OpStore %19 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,33 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int64
OpCapability Int8
%12 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mov_address"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%15 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uchar = OpTypeInt 8 0
%uint = OpTypeInt 32 0
%uint_8 = OpConstant %uint 8
%_arr_uchar_uint_8 = OpTypeArray %uchar %uint_8
%_ptr_Function__arr_uchar_uint_8 = OpTypePointer Function %_arr_uchar_uint_8
%1 = OpFunction %void None %15
%6 = OpFunctionParameter %ulong
%7 = OpFunctionParameter %ulong
%10 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function__arr_uchar_uint_8 Function
%5 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %6
OpStore %3 %7
%9 = OpConvertPtrToU %ulong %4
%8 = OpCopyObject %ulong %9
OpStore %5 %8
OpReturn
OpFunctionEnd

View File

@ -1,59 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%28 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mul_ftz"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%31 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%1 = OpFunction %void None %31
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%26 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_float Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_float %13
%12 = OpLoad %float %23 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_float %15
%38 = OpBitcast %_ptr_Generic_uchar %24
%39 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %38 %ulong_4
%22 = OpBitcast %_ptr_Generic_float %39
%14 = OpLoad %float %22 Aligned 4
OpStore %7 %14
%17 = OpLoad %float %6
%18 = OpLoad %float %7
%16 = OpFMul %float %17 %18
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %float %6
%25 = OpConvertUToPtr %_ptr_Generic_float %19
OpStore %25 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,47 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%23 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mul_hi"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%26 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_2 = OpConstant %ulong 2
%1 = OpFunction %void None %26
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%21 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%19 = OpConvertUToPtr %_ptr_Generic_ulong %13
%12 = OpLoad %ulong %19 Aligned 8
OpStore %6 %12
%15 = OpLoad %ulong %6
%14 = OpExtInst %ulong %23 u_mul_hi %15 %ulong_2
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %ulong %7
%20 = OpConvertUToPtr %_ptr_Generic_ulong %16
OpStore %20 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,47 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%23 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mul_lo"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%26 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_2 = OpConstant %ulong 2
%1 = OpFunction %void None %26
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%21 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%19 = OpConvertUToPtr %_ptr_Generic_ulong %13
%12 = OpLoad %ulong %19 Aligned 8
OpStore %6 %12
%15 = OpLoad %ulong %6
%14 = OpIMul %ulong %15 %ulong_2
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %ulong %7
%20 = OpConvertUToPtr %_ptr_Generic_ulong %16
OpStore %20 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,59 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%28 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mul_non_ftz"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%31 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%float = OpTypeFloat 32
%_ptr_Function_float = OpTypePointer Function %float
%_ptr_Generic_float = OpTypePointer Generic %float
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%1 = OpFunction %void None %31
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%26 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_float Function
%7 = OpVariable %_ptr_Function_float Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_float %13
%12 = OpLoad %float %23 Aligned 4
OpStore %6 %12
%15 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_float %15
%38 = OpBitcast %_ptr_Generic_uchar %24
%39 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %38 %ulong_4
%22 = OpBitcast %_ptr_Generic_float %39
%14 = OpLoad %float %22 Aligned 4
OpStore %7 %14
%17 = OpLoad %float %6
%18 = OpLoad %float %7
%16 = OpFMul %float %17 %18
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %float %6
%25 = OpConvertUToPtr %_ptr_Generic_float %19
OpStore %25 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,66 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%30 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "mul_wide"
OpExecutionMode %1 ContractionOff
OpDecorate %17 NoSignedWrap
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%33 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%ulong_4 = OpConstant %ulong 4
%uchar = OpTypeInt 8 0
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %33
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%28 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
%8 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12
%14 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %14
%13 = OpLoad %uint %24 Aligned 4
OpStore %6 %13
%16 = OpLoad %ulong %4
%25 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %16
%40 = OpBitcast %_ptr_CrossWorkgroup_uchar %25
%41 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %40 %ulong_4
%23 = OpBitcast %_ptr_CrossWorkgroup_uint %41
%15 = OpLoad %uint %23 Aligned 4
OpStore %7 %15
%18 = OpLoad %uint %6
%19 = OpLoad %uint %7
%42 = OpSConvert %ulong %18
%43 = OpSConvert %ulong %19
%17 = OpIMul %ulong %42 %43
OpStore %8 %17
%20 = OpLoad %ulong %5
%21 = OpLoad %ulong %8
%26 = OpConvertUToPtr %_ptr_Generic_ulong %20
%27 = OpCopyObject %ulong %21
OpStore %26 %27 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,47 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%21 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "neg"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%24 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%1 = OpFunction %void None %24
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%19 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%17 = OpConvertUToPtr %_ptr_Generic_uint %12
%11 = OpLoad %uint %17 Aligned 4
OpStore %6 %11
%14 = OpLoad %uint %6
%13 = OpSNegate %uint %14
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %uint %6
%18 = OpConvertUToPtr %_ptr_Generic_uint %15
OpStore %18 %16 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,60 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%27 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "non_scalar_ptr_offset"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%30 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%ulong_8 = OpConstant %ulong 8
%v2uint = OpTypeVector %uint 2
%_ptr_CrossWorkgroup_v2uint = OpTypePointer CrossWorkgroup %v2uint
%uchar = OpTypeInt 8 0
%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
%1 = OpFunction %void None %30
%9 = OpFunctionParameter %ulong
%10 = OpFunctionParameter %ulong
%25 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %9
OpStore %3 %10
%11 = OpLoad %ulong %2 Aligned 8
OpStore %4 %11
%12 = OpLoad %ulong %3 Aligned 8
OpStore %5 %12
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_CrossWorkgroup_v2uint %13
%38 = OpBitcast %_ptr_CrossWorkgroup_uchar %23
%39 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uchar %38 %ulong_8
%22 = OpBitcast %_ptr_CrossWorkgroup_v2uint %39
%8 = OpLoad %v2uint %22 Aligned 8
%14 = OpCompositeExtract %uint %8 0
%15 = OpCompositeExtract %uint %8 1
OpStore %6 %14
OpStore %7 %15
%17 = OpLoad %uint %6
%18 = OpLoad %uint %7
%16 = OpIAdd %uint %17 %18
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %uint %6
%24 = OpConvertUToPtr %_ptr_CrossWorkgroup_uint %19
OpStore %24 %20 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,48 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%24 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "not"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%27 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%1 = OpFunction %void None %27
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%22 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%18 = OpConvertUToPtr %_ptr_Generic_ulong %13
%12 = OpLoad %ulong %18 Aligned 8
OpStore %6 %12
%15 = OpLoad %ulong %6
%20 = OpCopyObject %ulong %15
%19 = OpNot %ulong %20
%14 = OpCopyObject %ulong %19
OpStore %7 %14
%16 = OpLoad %ulong %5
%17 = OpLoad %ulong %7
%21 = OpConvertUToPtr %_ptr_Generic_ulong %16
OpStore %21 %17 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,60 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%30 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "ntid"
OpExecutionMode %1 ContractionOff
OpDecorate %11 LinkageAttributes "__zluda_ptx_impl__sreg_ntid" Import
%void = OpTypeVoid
%uint = OpTypeInt 32 0
%uchar = OpTypeInt 8 0
%34 = OpTypeFunction %uint %uchar
%ulong = OpTypeInt 64 0
%36 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%uchar_0 = OpConstant %uchar 0
%11 = OpFunction %uint None %34
%13 = OpFunctionParameter %uchar
OpFunctionEnd
%1 = OpFunction %void None %36
%14 = OpFunctionParameter %ulong
%15 = OpFunctionParameter %ulong
%28 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
%7 = OpVariable %_ptr_Function_uint Function
OpStore %2 %14
OpStore %3 %15
%16 = OpLoad %ulong %2 Aligned 8
OpStore %4 %16
%17 = OpLoad %ulong %3 Aligned 8
OpStore %5 %17
%19 = OpLoad %ulong %4
%26 = OpConvertUToPtr %_ptr_Generic_uint %19
%18 = OpLoad %uint %26 Aligned 4
OpStore %6 %18
%10 = OpFunctionCall %uint %11 %uchar_0
%20 = OpCopyObject %uint %10
OpStore %7 %20
%22 = OpLoad %uint %6
%23 = OpLoad %uint %7
%21 = OpIAdd %uint %22 %23
OpStore %6 %21
%24 = OpLoad %ulong %5
%25 = OpLoad %uint %6
%27 = OpConvertUToPtr %_ptr_Generic_uint %24
OpStore %27 %25 Aligned 4
OpReturn
OpFunctionEnd

View File

@ -1,60 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
%31 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "or"
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%34 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%_ptr_Generic_ulong = OpTypePointer Generic %ulong
%ulong_8 = OpConstant %ulong 8
%uchar = OpTypeInt 8 0
%_ptr_Generic_uchar = OpTypePointer Generic %uchar
%1 = OpFunction %void None %34
%8 = OpFunctionParameter %ulong
%9 = OpFunctionParameter %ulong
%29 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_ulong Function
%7 = OpVariable %_ptr_Function_ulong Function
OpStore %2 %8
OpStore %3 %9
%10 = OpLoad %ulong %2 Aligned 8
OpStore %4 %10
%11 = OpLoad %ulong %3 Aligned 8
OpStore %5 %11
%13 = OpLoad %ulong %4
%23 = OpConvertUToPtr %_ptr_Generic_ulong %13
%12 = OpLoad %ulong %23 Aligned 8
OpStore %6 %12
%15 = OpLoad %ulong %4
%24 = OpConvertUToPtr %_ptr_Generic_ulong %15
%39 = OpBitcast %_ptr_Generic_uchar %24
%40 = OpInBoundsPtrAccessChain %_ptr_Generic_uchar %39 %ulong_8
%22 = OpBitcast %_ptr_Generic_ulong %40
%14 = OpLoad %ulong %22 Aligned 8
OpStore %7 %14
%17 = OpLoad %ulong %6
%18 = OpLoad %ulong %7
%26 = OpCopyObject %ulong %17
%27 = OpCopyObject %ulong %18
%25 = OpBitwiseOr %ulong %26 %27
%16 = OpCopyObject %ulong %25
OpStore %6 %16
%19 = OpLoad %ulong %5
%20 = OpLoad %ulong %6
%28 = OpConvertUToPtr %_ptr_Generic_ulong %19
OpStore %28 %20 Aligned 8
OpReturn
OpFunctionEnd

View File

@ -1,52 +0,0 @@
OpCapability GenericPointer
OpCapability Linkage
OpCapability Addresses
OpCapability Kernel
OpCapability Int8
OpCapability Int16
OpCapability Int64
OpCapability Float16
OpCapability Float64
OpCapability DenormFlushToZero
OpExtension "SPV_KHR_float_controls"
OpExtension "SPV_KHR_no_integer_wrap_decoration"
%22 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %1 "popc"
OpExecutionMode %1 ContractionOff
%void = OpTypeVoid
%ulong = OpTypeInt 64 0
%25 = OpTypeFunction %void %ulong %ulong
%_ptr_Function_ulong = OpTypePointer Function %ulong
%uint = OpTypeInt 32 0
%_ptr_Function_uint = OpTypePointer Function %uint
%_ptr_Generic_uint = OpTypePointer Generic %uint
%1 = OpFunction %void None %25
%7 = OpFunctionParameter %ulong
%8 = OpFunctionParameter %ulong
%20 = OpLabel
%2 = OpVariable %_ptr_Function_ulong Function
%3 = OpVariable %_ptr_Function_ulong Function
%4 = OpVariable %_ptr_Function_ulong Function
%5 = OpVariable %_ptr_Function_ulong Function
%6 = OpVariable %_ptr_Function_uint Function
OpStore %2 %7
OpStore %3 %8
%9 = OpLoad %ulong %2 Aligned 8
OpStore %4 %9
%10 = OpLoad %ulong %3 Aligned 8
OpStore %5 %10
%12 = OpLoad %ulong %4
%17 = OpConvertUToPtr %_ptr_Generic_uint %12
%11 = OpLoad %uint %17 Aligned 4
OpStore %6 %11
%14 = OpLoad %uint %6
%18 = OpBitCount %uint %14
%13 = OpCopyObject %uint %18
OpStore %6 %13
%15 = OpLoad %ulong %5
%16 = OpLoad %uint %6
%19 = OpConvertUToPtr %_ptr_Generic_uint %15
OpStore %19 %16 Aligned 4
OpReturn
OpFunctionEnd

Some files were not shown because too many files have changed in this diff Show More