mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-20 08:29:05 +03:00
Compare commits
183 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
7cdab7abc2 | ||
![]() |
d704e92c97 | ||
![]() |
867e4728d5 | ||
![]() |
646d746e02 | ||
![]() |
df5a96d935 | ||
![]() |
9c0747a5f7 | ||
![]() |
fee20e54d9 | ||
![]() |
7399132d5d | ||
![]() |
ecd61a8e2a | ||
![]() |
de870db1f1 | ||
![]() |
7ac67a89e9 | ||
![]() |
7a6df9dcbf | ||
![]() |
870fed4bb6 | ||
![]() |
970ba5aa25 | ||
![]() |
b4cb3ade63 | ||
![]() |
3870a96592 | ||
![]() |
1a63ef62b7 | ||
![]() |
7b2ecdd725 | ||
![]() |
c92abba2bb | ||
![]() |
46def3e7e0 | ||
![]() |
193eb29be8 | ||
![]() |
872054ae40 | ||
![]() |
90a1f77891 | ||
![]() |
164c172236 | ||
![]() |
2753d956df | ||
![]() |
c869a0d611 | ||
![]() |
9923a36b76 | ||
![]() |
89bc40618b | ||
![]() |
07aa1103aa | ||
![]() |
6f76c8b34c | ||
![]() |
2e56871643 | ||
![]() |
869efbe0e2 | ||
![]() |
9390db962b | ||
![]() |
bdcef897cc | ||
![]() |
971951bc9e | ||
![]() |
0ca14d740f | ||
![]() |
7ba1586d6c | ||
![]() |
04dbafaf4a | ||
![]() |
50e793e869 | ||
![]() |
dd915688bd | ||
![]() |
2c6d7ffb7a | ||
![]() |
26bf0eeaf2 | ||
![]() |
400feaf015 | ||
![]() |
fd1c13560f | ||
![]() |
3558a0a65c | ||
![]() |
0104814ac3 | ||
![]() |
a125b0746f | ||
![]() |
2cb5960a18 | ||
![]() |
24e100cb9c | ||
![]() |
e459086c5b | ||
![]() |
dd7ced8b37 | ||
![]() |
816365e7df | ||
![]() |
0172dc58e5 | ||
![]() |
b763415006 | ||
![]() |
c23be576e8 | ||
![]() |
370c0bd09e | ||
![]() |
9609f86033 | ||
![]() |
afe9120868 | ||
![]() |
04a411fe22 | ||
![]() |
ccf3c02ac1 | ||
![]() |
3de01b3f8b | ||
![]() |
d5a4b068dd | ||
![]() |
6ef19d6501 | ||
![]() |
5b2352723f | ||
![]() |
c37223fe67 | ||
![]() |
62ce1fd3a9 | ||
![]() |
04394dbb04 | ||
![]() |
314e3dcb49 | ||
![]() |
ca0d8ec666 | ||
![]() |
467782b1d0 | ||
![]() |
2cd0fcb650 | ||
![]() |
986fa49097 | ||
![]() |
dbb6f09ffa | ||
![]() |
e248a2c9a9 | ||
![]() |
2f951fa04c | ||
![]() |
5290190727 | ||
![]() |
ab67cd46fc | ||
![]() |
da9cf4d583 | ||
![]() |
a27d1e119f | ||
![]() |
e2fbdf7d7b | ||
![]() |
18245be7d5 | ||
![]() |
82510ce8fd | ||
![]() |
a71cd44104 | ||
![]() |
a63f004540 | ||
![]() |
5ec18f14a1 | ||
![]() |
4ae7feb93a | ||
![]() |
9631a8d242 | ||
![]() |
e2432d0df1 | ||
![]() |
043172bd9b | ||
![]() |
5969e59aae | ||
![]() |
4b4ba90219 | ||
![]() |
44decaf396 | ||
![]() |
407664600a | ||
![]() |
3ce6aee65d | ||
![]() |
3070d983ab | ||
![]() |
fefdd528d5 | ||
![]() |
20c9aa4f02 | ||
![]() |
8d79c5d0f8 | ||
![]() |
a610136e17 | ||
![]() |
479014a783 | ||
![]() |
5bfc2a56b9 | ||
![]() |
becda31524 | ||
![]() |
638786b0ec | ||
![]() |
b4de21fbc5 | ||
![]() |
4a71fefb8a | ||
![]() |
8f68287b18 | ||
![]() |
9d4f26bd07 | ||
![]() |
3d2024bf62 | ||
![]() |
58fb8a234c | ||
![]() |
2c6bee4955 | ||
![]() |
1897d33916 | ||
![]() |
5b593ec185 | ||
![]() |
ce25035051 | ||
![]() |
ecc33f7b10 | ||
![]() |
d76ffd691c | ||
![]() |
b460e359ae | ||
![]() |
ad2059872a | ||
![]() |
e328ecc550 | ||
![]() |
7d4fbedfcf | ||
![]() |
b2a455e12e | ||
![]() |
196242b410 | ||
![]() |
55fbe1abb5 | ||
![]() |
e89b0c5d9c | ||
![]() |
d7d38256e0 | ||
![]() |
b2765370e5 | ||
![]() |
015d23b5ad | ||
![]() |
1c0deca9e4 | ||
![]() |
23306e944b | ||
![]() |
23874efe68 | ||
![]() |
8ef6c3d8b6 | ||
![]() |
2e8716bf0d | ||
![]() |
9a568e2969 | ||
![]() |
e018de83ae | ||
![]() |
951c7558cc | ||
![]() |
2198862e76 | ||
![]() |
f0771e1fb6 | ||
![]() |
994cfb3386 | ||
![]() |
9ad88ac982 | ||
![]() |
e940b9400f | ||
![]() |
491e71e346 | ||
![]() |
83ba70bf37 | ||
![]() |
90960fd923 | ||
![]() |
f70abd065b | ||
![]() |
2e6f7e3fdc | ||
![]() |
3d9a79c41e | ||
![]() |
4091f658b2 | ||
![]() |
8d74c16c86 | ||
![]() |
2fc7af0434 | ||
![]() |
e40785aa74 | ||
![]() |
58a7fe53c6 | ||
![]() |
b3c73689a8 | ||
![]() |
89e72e4e95 | ||
![]() |
dca4c5bd21 | ||
![]() |
82b5cef0bd | ||
![]() |
425edfcdd4 | ||
![]() |
7f051ad20e | ||
![]() |
9d92a6e284 | ||
![]() |
d51aaaf552 | ||
![]() |
a55c851eaa | ||
![]() |
8cd3db6648 | ||
![]() |
4d04fe251d | ||
![]() |
a0baad9456 | ||
![]() |
a005c92c61 | ||
![]() |
fedf88180a | ||
![]() |
96f95d59ce | ||
![]() |
a39dda67d1 | ||
![]() |
8393dbd6e9 | ||
![]() |
9dcfb45aa2 | ||
![]() |
94af72f46b | ||
![]() |
15f465041d | ||
![]() |
17291019e3 | ||
![]() |
efd91e270c | ||
![]() |
cdac38d572 | ||
![]() |
648035a01a | ||
![]() |
178ec59af6 | ||
![]() |
d3cd2dc8b4 | ||
![]() |
eec55d9d02 | ||
![]() |
06a5cff2d8 | ||
![]() |
088ff760de | ||
![]() |
ba83bb28f7 | ||
![]() |
b7ee6d66c3 | ||
![]() |
871b8d1bef | ||
![]() |
bfae2e0d21 |
@ -1,2 +1,2 @@
|
||||
[target."x86_64-pc-windows-gnu"]
|
||||
rustflags = ["-C", "link-self-contained=y"]
|
||||
[alias]
|
||||
xtask = "run --package xtask --"
|
52
.devcontainer/Dockerfile
Normal file
52
.devcontainer/Dockerfile
Normal file
@ -0,0 +1,52 @@
|
||||
FROM nvidia/cuda:12.4.1-base-ubuntu22.04
|
||||
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
wget \
|
||||
build-essential \
|
||||
cmake \
|
||||
ninja-build \
|
||||
python3 \
|
||||
ripgrep \
|
||||
git \
|
||||
ltrace \
|
||||
# required by llvm 17
|
||||
lsb-release software-properties-common gnupg
|
||||
|
||||
ARG LLVM_VERSION=17
|
||||
RUN wget https://apt.llvm.org/llvm.sh && \
|
||||
chmod +x llvm.sh && \
|
||||
./llvm.sh ${LLVM_VERSION}
|
||||
|
||||
# Feel free to change to a newer version if you have a newer verison on your host
|
||||
ARG CUDA_PKG_VERSION=12-4
|
||||
# Docker <-> host driver version compatiblity is newer host <-> older docker
|
||||
# We don't care about a specific driver version, so pick oldest 5XX
|
||||
ARG CUDA_DRIVER=515
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
# CUDA headers need it for interop
|
||||
libgl-dev libegl-dev libvdpau-dev \
|
||||
nvidia-utils-${CUDA_DRIVER} \
|
||||
cuda-cudart-dev-${CUDA_PKG_VERSION} \
|
||||
cuda-nvml-dev-${CUDA_PKG_VERSION} \
|
||||
cuda-cudart-${CUDA_PKG_VERSION} \
|
||||
cuda-profiler-api-${CUDA_PKG_VERSION} \
|
||||
cuda-nvcc-${CUDA_PKG_VERSION}
|
||||
|
||||
ARG ROCM_VERSION=6.3.1
|
||||
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
|
||||
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} jammy main" > /etc/apt/sources.list.d/rocm.list && \
|
||||
echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
rocminfo \
|
||||
rocm-gdb \
|
||||
rocm-smi-lib \
|
||||
rocm-llvm-dev \
|
||||
hip-runtime-amd \
|
||||
hip-dev && \
|
||||
echo '/opt/rocm/lib' > /etc/ld.so.conf.d/rocm.conf && \
|
||||
ldconfig
|
||||
|
||||
ENV PATH=$PATH:/opt/rocm-${ROCM_VERSION}/bin
|
||||
|
34
.devcontainer/devcontainer.json
Normal file
34
.devcontainer/devcontainer.json
Normal file
@ -0,0 +1,34 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/rust
|
||||
{
|
||||
"name": "zluda",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
"securityOpt": [ "seccomp=unconfined" ],
|
||||
"runArgs": [
|
||||
"--runtime=nvidia",
|
||||
"--device=/dev/kfd",
|
||||
"--device=/dev/dri",
|
||||
"--group-add=video"
|
||||
],
|
||||
"mounts": [
|
||||
{
|
||||
"source": "${localEnv:HOME}/.cargo/",
|
||||
"target": "/root/.cargo",
|
||||
"type": "bind"
|
||||
}
|
||||
],
|
||||
// https://containers.dev/features.
|
||||
"features": {
|
||||
"ghcr.io/devcontainers/features/rust:1": {}
|
||||
},
|
||||
// https://aka.ms/dev-containers-non-root.
|
||||
"remoteUser": "root",
|
||||
//"hostRequirements": { "gpu": "optional" }
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [ "mhutchie.git-graph" ]
|
||||
}
|
||||
}
|
||||
}
|
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
ext/** linguist-vendored
|
58
.github/workflows/rust.yml
vendored
58
.github/workflows/rust.yml
vendored
@ -1,58 +0,0 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
jobs:
|
||||
build_lin:
|
||||
name: Build and publish (Linux)
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- name: Install GPU drivers
|
||||
run: |
|
||||
sudo apt-get install -y gpg-agent wget
|
||||
wget -qO - https://repositories.intel.com/graphics/intel-graphics.key | sudo apt-key add -
|
||||
sudo apt-add-repository 'deb [arch=amd64] https://repositories.intel.com/graphics/ubuntu focal main'
|
||||
sudo apt-get update
|
||||
sudo apt-get install intel-opencl-icd intel-level-zero-gpu level-zero intel-media-va-driver-non-free libmfx1 libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev ocl-icd-opencl-dev
|
||||
- name: Build
|
||||
run: cargo build --workspace --verbose --release
|
||||
- name: Rename to libcuda.so
|
||||
run: |
|
||||
mv target/release/libnvcuda.so target/release/libcuda.so
|
||||
ln -s libcuda.so target/release/libcuda.so.1
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: Linux
|
||||
path: |
|
||||
target/release/libcuda.so
|
||||
target/release/libcuda.so.1
|
||||
build_win:
|
||||
name: Build and publish (Windows)
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
- name: Build
|
||||
run: cargo build --workspace --verbose --release
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: Windows
|
||||
path: |
|
||||
target/release/nvcuda.dll
|
||||
target/release/zluda_redirect.dll
|
||||
target/release/zluda_with.exe
|
||||
target/release/zluda_dump.dll
|
||||
# TODO(take-cheeze): Support testing
|
||||
# - name: Run tests
|
||||
# run: cargo test --verbose
|
12
.gitmodules
vendored
12
.gitmodules
vendored
@ -1,7 +1,5 @@
|
||||
[submodule "ext/spirv-tools"]
|
||||
path = ext/spirv-tools
|
||||
url = https://github.com/KhronosGroup/SPIRV-Tools
|
||||
branch = master
|
||||
[submodule "ext/spirv-headers"]
|
||||
path = ext/spirv-headers
|
||||
url = https://github.com/KhronosGroup/SPIRV-Headers
|
||||
[submodule "ext/llvm-project"]
|
||||
path = ext/llvm-project
|
||||
url = https://github.com/llvm/llvm-project
|
||||
branch = release/17.x
|
||||
shallow = true
|
||||
|
@ -1,61 +0,0 @@
|
||||
# Dependencies
|
||||
|
||||
Development builds of ZLUDA requires following dependencies:
|
||||
|
||||
* CMake
|
||||
* Python 3
|
||||
|
||||
Additionally the repository has to be cloned with Git submodules initalized. If you cloned the repo without initalizing submodules, do this:
|
||||
```
|
||||
git submodule update --init --recursive
|
||||
```
|
||||
|
||||
# Tests
|
||||
|
||||
Tests should be executed with `--workspace` option to test non-default targets:
|
||||
```
|
||||
cargo test --workspace
|
||||
```
|
||||
|
||||
# Debugging
|
||||
|
||||
## Debuggging CUDA applications
|
||||
|
||||
When running an application with ZLUDA quite often you will run into subtle bugs or incompatibilities in the generated GPU code. The best way to debug an application's GPU CUDA code is to use ZLUDA dumper.
|
||||
|
||||
Library `zluda_dump` can be injected into a CUDA application and produce a trace which, for every launched GPU function contains:
|
||||
* PTX source
|
||||
* Launch arguments (block size, grid size, shared memory size)
|
||||
* Dump of function arguments. Both after and before
|
||||
|
||||
Example use with GeekBench:
|
||||
```
|
||||
set ZLUDA_DUMP_KERNEL=knn_match
|
||||
set ZLUDA_DUMP_DIR=C:\temp\zluda_dump
|
||||
"<ZLUDA_PATH>\zluda_with.exe" "<ZLUDA_PATH>\zluda_dump.dll" -- "geekbench_x86_64.exe" --compute CUDA
|
||||
```
|
||||
|
||||
The example above, for every execution of GPU function `knn_match`, will save its details into the directory `C:\temp\zluda_dump`
|
||||
|
||||
This dump can be replayed with `replay.py` script from `zluda_dump` source directory. Use it like this:
|
||||
```
|
||||
python replay.py "C:\temp\zluda_dump\geekbench_x86_64.exe"
|
||||
```
|
||||
You must copy (or symlink) ZLUDA `nvcuda.dll` into PyCUDA directory, so it will run using ZLUDA. Example output:
|
||||
```
|
||||
Intel(R) Graphics [0x3e92] [github.com/vosen/ZLUDA]
|
||||
C:\temp\zluda_dump\geekbench_x86_64.exe\4140_scale_pyramid
|
||||
C:\temp\zluda_dump\geekbench_x86_64.exe\4345_convolve_1d_vertical_grayscale
|
||||
Skipping, launch block size (512) bigger than maximum block size (256)
|
||||
C:\temp\zluda_dump\geekbench_x86_64.exe\4480_scale_pyramid
|
||||
6:
|
||||
Arrays are not equal
|
||||
|
||||
Mismatched elements: 1200 / 19989588 (0.006%)
|
||||
Max absolute difference: 255
|
||||
Max relative difference: 255.
|
||||
x: array([ 7, 6, 8, ..., 193, 195, 193], dtype=uint8)
|
||||
y: array([ 7, 6, 8, ..., 193, 195, 193], dtype=uint8)
|
||||
```
|
||||
From this output one can observe that in kernel launch 4480, 6th argument to function `scale_pyramid` differs between what was executed on an NVIDIA GPU using CUDA and Intel GPU using ZLUDA.
|
||||
__Important__: It's impossible to infer what was the type (and semantics) of argument passed to a GPU function. At our level it's a buffer of bytes and by default `replay.py` simply checks if two buffers are byte-equal. That means you will have a ton of false negatives when running `replay.py`. You should override them for your particular case in `replay.py` - it already contains some overrides for GeekBench kernels
|
1395
Cargo.lock
generated
Normal file
1395
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
53
Cargo.toml
53
Cargo.toml
@ -1,20 +1,33 @@
|
||||
[workspace]
|
||||
|
||||
members = [
|
||||
"detours-sys",
|
||||
"level_zero-sys",
|
||||
"level_zero",
|
||||
"spirv_tools-sys",
|
||||
"zluda",
|
||||
"zluda_dump",
|
||||
"zluda_lib",
|
||||
"zluda_inject",
|
||||
"zluda_redirect",
|
||||
"ptx",
|
||||
]
|
||||
|
||||
default-members = ["zluda_lib", "zluda_inject", "zluda_redirect"]
|
||||
|
||||
[patch.crates-io]
|
||||
rspirv = { git = 'https://github.com/vosen/rspirv', rev = '40f5aa4dedb0d9f1ec24bdd8b6019e01996d1d74' }
|
||||
spirv_headers = { git = 'https://github.com/vosen/rspirv', rev = '40f5aa4dedb0d9f1ec24bdd8b6019e01996d1d74' }
|
||||
[workspace]
|
||||
|
||||
resolver = "2"
|
||||
|
||||
members = [
|
||||
"ext/hip_runtime-sys",
|
||||
"ext/amd_comgr-sys",
|
||||
"comgr",
|
||||
"cuda_base",
|
||||
"cuda_types",
|
||||
"detours-sys",
|
||||
"zluda",
|
||||
"zluda_dump",
|
||||
"zluda_inject",
|
||||
"zluda_redirect",
|
||||
"zluda_ml",
|
||||
"ptx",
|
||||
"ptx_parser",
|
||||
"ptx_parser_macros",
|
||||
"ptx_parser_macros_impl",
|
||||
"xtask",
|
||||
"zluda_bindgen",
|
||||
]
|
||||
|
||||
default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect"]
|
||||
|
||||
[profile.release-lto]
|
||||
inherits = "release"
|
||||
codegen-units = 1
|
||||
lto = true
|
||||
|
||||
[profile.dev.package.xtask]
|
||||
opt-level = 2
|
||||
|
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 259 KiB |
113
README.md
113
README.md
@ -1,87 +1,72 @@
|
||||
[](https://discord.gg/sg6BNzXuc7)
|
||||
|
||||
# ZLUDA
|
||||
|
||||
ZLUDA is a drop-in replacement for CUDA on Intel GPU. ZLUDA allows to run unmodified CUDA applications using Intel GPUs with near-native performance (more below). It works with current integrated Intel UHD GPUs and will work with future Intel Xe GPUs
|
||||
ZLUDA is a drop-in replacement for CUDA on non-NVIDIA GPU. ZLUDA allows to run unmodified CUDA applications using non-NVIDIA GPUs with near-native performance.
|
||||
|
||||
## Performance
|
||||
ZLUDA supports AMD Radeon RX 5000 series and newer GPUs (both desktop and integrated).
|
||||
|
||||
ZLUDA performance has been measured with GeekBench 5.2.3 on Intel UHD 630.\
|
||||
One measurement has been done using OpenCL and another measurement has been done using CUDA with Intel GPU masquerading as a (relatively slow) NVIDIA GPU with the help of ZLUDA. Both measurements use the same GPU.
|
||||
|
||||
Performance below is normalized to OpenCL performance. 110% means that ZLUDA-implemented CUDA is 10% faster on Intel UHD 630.
|
||||
|
||||

|
||||
|
||||
[ZLUDA - detailed results on Geekbench.com](https://browser.geekbench.com/v5/compute/2305009)
|
||||
|
||||
[OpenCL - detailed results on Geekbench.com](https://browser.geekbench.com/v5/compute/2304997)
|
||||
|
||||
Overall, ZLUDA is slower in GeekBench by roughly 2%.
|
||||
|
||||
### Explanation of the results
|
||||
* Why is ZLUDA faster in some benchmarks?\
|
||||
This has not been precisely pinpointed to one thing or another but it's likely a combination of things:
|
||||
* ZLUDA uses [Level 0](https://spec.oneapi.com/level-zero/latest/index.html), which in general is a more low level, high performance API than OpenCL
|
||||
* Tying to the previous point, currently ZLUDA does not support asynchronous execution. This gives us an unfair advantage in a benchmark like GeekBench. GeekBench exclusively uses CUDA synchronous APIs
|
||||
* There is a set of GPU instructions which are available on both NVIDIA hardware and Intel hardware, but are not exposed through OpenCL. We are comparing NVIDIA GPU optimized code with the more general OpenCL code. It's a lucky coincidence (and a credit to the underlying Intel Graphics Compiler) that this code also works well on an Intel GPU
|
||||
* Why is OpenCL faster in Canny and Horizon Detection?\
|
||||
Authors of CUDA benchmarks used CUDA functions `atomicInc` and `atomicDec` which have direct hardware support on NVIDIA cards, but no hardware support on Intel cards. They have to be emulated in software, which limits performance
|
||||
* Why is ZLUDA slower in the remaining benchmarks?\
|
||||
The reason is unknown. Most likely, in some tests we compile from suboptimal NVIDIA GPU code and in other tests ZLUDA itself is emitting suboptimal Intel GPU code. For example, SFFT used to be even slower before PR [#22](https://github.com/vosen/ZLUDA/pull/22)
|
||||
|
||||
|
||||
## Details
|
||||
|
||||
* Is ZLUDA a drop-in replacement for CUDA?\
|
||||
Yes, but certain applications use CUDA in ways which make it incompatible with ZLUDA
|
||||
* What is the status of the project?\
|
||||
This project is a Proof of Concept. About the only thing that works currently is Geekbench. It's amazingly buggy and incomplete. You should not rely on it for anything serious
|
||||
* Is it an Intel project? Is it an NVIDIA project?\
|
||||
No, it's a private project
|
||||
* What is the performance?\
|
||||
Performance can be close to the performance of similarly written OpenCL code (see GeekBench results in the previous section). NVIDIA GPUs and Intel GPUs have different architecture and feature set. Consequently, certain NVIDIA features have to be emulated in ZLUDA with performance penalty. Additionally, performance of ZLUDA will be always lower than the performance of code specifically optimized for Intel GPUs
|
||||
* How it's different from AMD HIP or Intel DPC++ Compatibility toolkit?\
|
||||
Both are porting toolkits which require programmer's effort to port applications to the API in question. With ZLUDA existing applications "just work" on an Intel GPU (if you are lucky and ZLUDA supports the particular subset of CUDA)
|
||||
* Which Intel GPU are supported?\
|
||||
Intel Gen9 and newer (Skylake and newer) which are supported by Intel Level 0
|
||||
* Does ZLUDA support AMD GPUs?\
|
||||
Certainly not currently, but it might be technically possible
|
||||

|
||||
|
||||
ZLUDA is work in progress. Follow development here and say hi on [Discord](https://discord.gg/sg6BNzXuc7). For more details see the announcement: https://vosen.github.io/ZLUDA/blog/zludas-third-life/
|
||||
|
||||
## Usage
|
||||
**Warning**: this is a very incomplete proof of concept. It's probably not going to work with your application. ZLUDA currently works only with applications which use CUDA Driver API or statically-linked CUDA Runtime API - dynamically-linked CUDA Runtime API is not supported at all
|
||||
**Warning**: This version ZLUDA is under heavy development (more [here](https://vosen.github.io/ZLUDA/blog/zludas-third-life/)) and right now only supports Geekbench. ZLUDA probably will not work with your application just yet.
|
||||
|
||||
### Windows
|
||||
You should have the most recent Intel GPU drivers installed.\
|
||||
Run your application like this:
|
||||
```
|
||||
<ZLUDA_DIRECTORY>\zluda_with.exe -- <APPLICATION> <APPLICATIONS_ARGUMENTS>
|
||||
```
|
||||
You should have recent AMD GPU driver ("AMD Software: Adrenalin Edition") installed.\
|
||||
To run your application you should etiher:
|
||||
* (Recommended approach) Copy ZLUDA-provided `nvcuda.dll` and `nvml.dll` from `target\release` (if built from sources) or `zluda` (if downloaded a zip package) into a path which your application uses to load CUDA. Paths vary application to application, but usually it's the directory where the .exe file is located
|
||||
* Use ZLUDA launcher like below. ZLUDA launcher is known to be buggy and incomplete:
|
||||
```
|
||||
<ZLUDA_DIRECTORY>\zluda_with.exe -- <APPLICATION> <APPLICATIONS_ARGUMENTS>
|
||||
```
|
||||
|
||||
### Linux
|
||||
You should install most recent run-time driver packages as outlined here: https://dgpu-docs.intel.com/installation-guides/index.html.
|
||||
|
||||
Run your application like this:
|
||||
```
|
||||
LD_LIBRARY_PATH=<ZLUDA_DIRECTORY> <APPLICATION> <APPLICATIONS_ARGUMENTS>
|
||||
```
|
||||
|
||||
## Building
|
||||
You should have a relatively recent version of Rust installed, then you just do:
|
||||
where `<ZLUDA_DIRECTORY>` is the directory which contains ZLUDA-provided `libcuda.so`: `target/release` if you built from sources or `zluda` if you downloaded prebuilt package.
|
||||
|
||||
```
|
||||
cargo build --release
|
||||
```
|
||||
in the main directory of the project.
|
||||
### Linux
|
||||
You should install most recent run-time an developer driver packages as outlined here: https://dgpu-docs.intel.com/installation-guides/index.html. Additionally, you should have `ocl-icd-opencl-dev` (or equivalent) installed.
|
||||
If you are building on Linux you must also symlink (or rename) the ZLUDA output binaries after ZLUDA build finishes:
|
||||
```
|
||||
ln -s libnvcuda.so target/release/libcuda.so
|
||||
ln -s libcuda.so target/release/libcuda.so.1
|
||||
```
|
||||
### MacOS
|
||||
|
||||
Not supported
|
||||
|
||||
## Building
|
||||
|
||||
### Dependencies
|
||||
|
||||
* Git
|
||||
* CMake
|
||||
* Python 3
|
||||
* Rust compiler (recent version)
|
||||
* C++ compiler
|
||||
* (Optional, but recommended) [Ninja build system](https://ninja-build.org/)
|
||||
|
||||
### Build steps
|
||||
|
||||
* Git clone the repo (make sure to use `--recursive` option to fetch submodules):
|
||||
`git clone --recursive https://github.com/vosen/ZLUDA.git`
|
||||
* Enter freshly cloned `ZLUDA` directory and build with cargo (this takes a while):
|
||||
`cargo xtask --release`
|
||||
|
||||
## Contributing
|
||||
|
||||
If you want to develop ZLUDA itself, read [CONTRIBUTING.md](CONTRIBUTING.md), it contains instructions how to set up dependencies and run tests
|
||||
ZLUDA project has a commercial backing and _does not_ accept donations.
|
||||
ZLUDA project accepts pull requests and other non-monetary contributions.
|
||||
|
||||
If you want to contribute a code fix or documentation update feel free to open a Pull Request.
|
||||
|
||||
### Getting started
|
||||
|
||||
There's no architecture document (yet). Two most important crates in ZLUDA are `ptx` (PTX compiler) and `zluda` (AMD GPU runtime). A good starting point to tinkering the project is to run one of the `ptx` unit tests under a debugger and understand what it is doing. `cargo test -p ptx -- ::add_hip` is a simple test that adds two numbers.
|
||||
|
||||
Github issues tagged with ["help wanted"](https://github.com/vosen/ZLUDA/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) are tasks that are self-containted. Their level of difficulty varies, they are not always good beginner tasks, but they defined unambiguously.
|
||||
|
||||
If you have questions feel free to ask on [#devtalk channel on Discord](https://discord.com/channels/1273316903783497778/1303329281409159270).
|
||||
|
||||
|
||||
## License
|
||||
|
10
comgr/Cargo.toml
Normal file
10
comgr/Cargo.toml
Normal file
@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "comgr"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
amd_comgr-sys = { path = "../ext/amd_comgr-sys" }
|
186
comgr/src/lib.rs
Normal file
186
comgr/src/lib.rs
Normal file
@ -0,0 +1,186 @@
|
||||
use amd_comgr_sys::*;
|
||||
use std::{ffi::CStr, mem, ptr};
|
||||
|
||||
struct Data(amd_comgr_data_t);
|
||||
|
||||
impl Data {
|
||||
fn new(
|
||||
kind: amd_comgr_data_kind_t,
|
||||
name: &CStr,
|
||||
content: &[u8],
|
||||
) -> Result<Self, amd_comgr_status_s> {
|
||||
let mut data = unsafe { mem::zeroed() };
|
||||
unsafe { amd_comgr_create_data(kind, &mut data) }?;
|
||||
unsafe { amd_comgr_set_data_name(data, name.as_ptr()) }?;
|
||||
unsafe { amd_comgr_set_data(data, content.len(), content.as_ptr().cast()) }?;
|
||||
Ok(Self(data))
|
||||
}
|
||||
|
||||
fn get(&self) -> amd_comgr_data_t {
|
||||
self.0
|
||||
}
|
||||
|
||||
fn copy_content(&self) -> Result<Vec<u8>, amd_comgr_status_s> {
|
||||
let mut size = unsafe { mem::zeroed() };
|
||||
unsafe { amd_comgr_get_data(self.get(), &mut size, ptr::null_mut()) }?;
|
||||
let mut result: Vec<u8> = Vec::with_capacity(size);
|
||||
unsafe { result.set_len(size) };
|
||||
unsafe { amd_comgr_get_data(self.get(), &mut size, result.as_mut_ptr().cast()) }?;
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
struct DataSet(amd_comgr_data_set_t);
|
||||
|
||||
impl DataSet {
|
||||
fn new() -> Result<Self, amd_comgr_status_s> {
|
||||
let mut data_set = unsafe { mem::zeroed() };
|
||||
unsafe { amd_comgr_create_data_set(&mut data_set) }?;
|
||||
Ok(Self(data_set))
|
||||
}
|
||||
|
||||
fn add(&self, data: &Data) -> Result<(), amd_comgr_status_s> {
|
||||
unsafe { amd_comgr_data_set_add(self.get(), data.get()) }
|
||||
}
|
||||
|
||||
fn get(&self) -> amd_comgr_data_set_t {
|
||||
self.0
|
||||
}
|
||||
|
||||
fn get_data(
|
||||
&self,
|
||||
kind: amd_comgr_data_kind_t,
|
||||
index: usize,
|
||||
) -> Result<Data, amd_comgr_status_s> {
|
||||
let mut data = unsafe { mem::zeroed() };
|
||||
unsafe { amd_comgr_action_data_get_data(self.get(), kind, index, &mut data) }?;
|
||||
Ok(Data(data))
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DataSet {
|
||||
fn drop(&mut self) {
|
||||
unsafe { amd_comgr_destroy_data_set(self.get()).ok() };
|
||||
}
|
||||
}
|
||||
|
||||
struct ActionInfo(amd_comgr_action_info_t);
|
||||
|
||||
impl ActionInfo {
|
||||
fn new() -> Result<Self, amd_comgr_status_s> {
|
||||
let mut action = unsafe { mem::zeroed() };
|
||||
unsafe { amd_comgr_create_action_info(&mut action) }?;
|
||||
Ok(Self(action))
|
||||
}
|
||||
|
||||
fn set_isa_name(&self, isa: &CStr) -> Result<(), amd_comgr_status_s> {
|
||||
let mut full_isa = "amdgcn-amd-amdhsa--".to_string().into_bytes();
|
||||
full_isa.extend(isa.to_bytes_with_nul());
|
||||
unsafe { amd_comgr_action_info_set_isa_name(self.get(), full_isa.as_ptr().cast()) }
|
||||
}
|
||||
|
||||
fn set_language(&self, language: amd_comgr_language_t) -> Result<(), amd_comgr_status_s> {
|
||||
unsafe { amd_comgr_action_info_set_language(self.get(), language) }
|
||||
}
|
||||
|
||||
fn set_options<'a>(
|
||||
&self,
|
||||
options: impl Iterator<Item = &'a CStr>,
|
||||
) -> Result<(), amd_comgr_status_s> {
|
||||
let options = options.map(|x| x.as_ptr()).collect::<Vec<_>>();
|
||||
unsafe {
|
||||
amd_comgr_action_info_set_option_list(
|
||||
self.get(),
|
||||
options.as_ptr().cast_mut(),
|
||||
options.len(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn get(&self) -> amd_comgr_action_info_t {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ActionInfo {
|
||||
fn drop(&mut self) {
|
||||
unsafe { amd_comgr_destroy_action_info(self.get()).ok() };
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compile_bitcode(
|
||||
gcn_arch: &CStr,
|
||||
main_buffer: &[u8],
|
||||
ptx_impl: &[u8],
|
||||
) -> Result<Vec<u8>, amd_comgr_status_s> {
|
||||
use amd_comgr_sys::*;
|
||||
let bitcode_data_set = DataSet::new()?;
|
||||
let main_bitcode_data = Data::new(
|
||||
amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_BC,
|
||||
c"zluda.bc",
|
||||
main_buffer,
|
||||
)?;
|
||||
bitcode_data_set.add(&main_bitcode_data)?;
|
||||
let stdlib_bitcode_data = Data::new(
|
||||
amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_BC,
|
||||
c"ptx_impl.bc",
|
||||
ptx_impl,
|
||||
)?;
|
||||
bitcode_data_set.add(&stdlib_bitcode_data)?;
|
||||
let linking_info = ActionInfo::new()?;
|
||||
let linked_data_set = do_action(
|
||||
&bitcode_data_set,
|
||||
&linking_info,
|
||||
amd_comgr_action_kind_t::AMD_COMGR_ACTION_LINK_BC_TO_BC,
|
||||
)?;
|
||||
let compile_to_exec = ActionInfo::new()?;
|
||||
compile_to_exec.set_isa_name(gcn_arch)?;
|
||||
compile_to_exec.set_language(amd_comgr_language_t::AMD_COMGR_LANGUAGE_LLVM_IR)?;
|
||||
let common_options = [
|
||||
// This makes no sense, but it makes ockl linking work
|
||||
c"-Xclang",
|
||||
c"-mno-link-builtin-bitcode-postopt",
|
||||
// Otherwise LLVM omits dynamic fp mode for ockl functions during linking
|
||||
// and then fails to inline them
|
||||
c"-Xclang",
|
||||
c"-fdenormal-fp-math=dynamic",
|
||||
c"-O3",
|
||||
c"-mno-wavefrontsize64",
|
||||
c"-mcumode",
|
||||
// Useful for inlining reports, combined with AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_EMIT_VERBOSE_LOGS=1 AMD_COMGR_REDIRECT_LOGS=stderr
|
||||
// c"-fsave-optimization-record=yaml",
|
||||
]
|
||||
.into_iter();
|
||||
let opt_options = if cfg!(debug_assertions) {
|
||||
//[c"-g", c"-mllvm", c"-print-before-all", c"", c""]
|
||||
[c"-g", c"", c"", c"", c""]
|
||||
} else {
|
||||
[
|
||||
c"-g0",
|
||||
// default inlining threshold times 10
|
||||
c"-mllvm",
|
||||
c"-inline-threshold=2250",
|
||||
c"-mllvm",
|
||||
c"-inlinehint-threshold=3250",
|
||||
]
|
||||
};
|
||||
compile_to_exec.set_options(common_options.chain(opt_options))?;
|
||||
let exec_data_set = do_action(
|
||||
&linked_data_set,
|
||||
&compile_to_exec,
|
||||
amd_comgr_action_kind_t::AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE,
|
||||
)?;
|
||||
let executable =
|
||||
exec_data_set.get_data(amd_comgr_data_kind_t::AMD_COMGR_DATA_KIND_EXECUTABLE, 0)?;
|
||||
executable.copy_content()
|
||||
}
|
||||
|
||||
fn do_action(
|
||||
data_set: &DataSet,
|
||||
action: &ActionInfo,
|
||||
kind: amd_comgr_action_kind_t,
|
||||
) -> Result<DataSet, amd_comgr_status_s> {
|
||||
let result = DataSet::new()?;
|
||||
unsafe { amd_comgr_do_action(kind, action.get(), data_set.get(), result.get()) }?;
|
||||
Ok(result)
|
||||
}
|
14
cuda_base/Cargo.toml
Normal file
14
cuda_base/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "cuda_base"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
quote = "1.0"
|
||||
syn = { version = "2.0", features = ["full", "visit-mut", "extra-traits"] }
|
||||
proc-macro2 = "1.0"
|
||||
rustc-hash = "1.1.0"
|
||||
|
||||
[lib]
|
||||
proc-macro = true
|
7
cuda_base/build/wrapper.h
Normal file
7
cuda_base/build/wrapper.h
Normal file
@ -0,0 +1,7 @@
|
||||
#define __CUDA_API_VERSION_INTERNAL
|
||||
#include <cuda.h>
|
||||
#include <cudaProfiler.h>
|
||||
#include <cudaGL.h>
|
||||
#include <cudaEGL.h>
|
||||
#include <vdpau/vdpau.h>
|
||||
#include <cudaVDPAU.h>
|
20935
cuda_base/src/cuda.rs
Normal file
20935
cuda_base/src/cuda.rs
Normal file
File diff suppressed because it is too large
Load Diff
233
cuda_base/src/lib.rs
Normal file
233
cuda_base/src/lib.rs
Normal file
@ -0,0 +1,233 @@
|
||||
extern crate proc_macro;
|
||||
|
||||
use proc_macro::TokenStream;
|
||||
use proc_macro2::Span;
|
||||
use quote::{quote, ToTokens};
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::iter;
|
||||
use syn::parse::{Parse, ParseStream};
|
||||
use syn::punctuated::Punctuated;
|
||||
use syn::visit_mut::VisitMut;
|
||||
use syn::{
|
||||
bracketed, parse_macro_input, File, ForeignItem, ForeignItemFn, Ident, Item, Path, Signature,
|
||||
Token,
|
||||
};
|
||||
|
||||
const CUDA_RS: &'static str = include_str! {"cuda.rs"};
|
||||
const NVML_RS: &'static str = include_str! {"nvml.rs"};
|
||||
|
||||
// This macro accepts following arguments:
|
||||
// * `normal_macro`: ident for a normal macro
|
||||
// * zero or more:
|
||||
// * `override_macro`: ident for an override macro
|
||||
// * `override_fns`: list of override functions
|
||||
// Then macro goes through every function in rust.rs, and for every fn `foo`:
|
||||
// * if `foo` is contained in `override_fns` then pass it into `override_macro`
|
||||
// * if `foo` is not contained in `override_fns` pass it to `normal_macro`
|
||||
// Both `override_macro` and `normal_macro` expect semicolon-separated list:
|
||||
// macro_foo!(
|
||||
// "system" fn cuCtxDetach(ctx: CUcontext) -> CUresult;
|
||||
// "system" fn cuCtxDetach(ctx: CUcontext) -> CUresult
|
||||
// )
|
||||
// Additionally, it does a fixup of CUDA types so they get prefixed with `type_path`
|
||||
#[proc_macro]
|
||||
pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
function_declarations(tokens, CUDA_RS)
|
||||
}
|
||||
|
||||
fn function_declarations(tokens: TokenStream, module: &str) -> TokenStream {
|
||||
let input = parse_macro_input!(tokens as FnDeclInput);
|
||||
let mut cuda_module = syn::parse_str::<File>(module).unwrap();
|
||||
let mut choose_macro = ChooseMacro::new(input);
|
||||
syn::visit_mut::visit_file_mut(&mut FixFnSignatures, &mut cuda_module);
|
||||
let extern_ = if let Item::ForeignMod(extern_) = cuda_module.items.pop().unwrap() {
|
||||
extern_
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let abi = extern_.abi.name;
|
||||
for mut item in extern_.items {
|
||||
if let ForeignItem::Fn(ForeignItemFn {
|
||||
sig: Signature { ref ident, .. },
|
||||
ref mut attrs,
|
||||
..
|
||||
}) = item
|
||||
{
|
||||
*attrs = Vec::new();
|
||||
choose_macro.add(ident, quote! { #abi #item });
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
let mut result = proc_macro2::TokenStream::new();
|
||||
for (path, items) in
|
||||
iter::once(choose_macro.default).chain(choose_macro.override_sets.into_iter())
|
||||
{
|
||||
if items.is_empty() {
|
||||
continue;
|
||||
}
|
||||
quote! {
|
||||
#path ! { #(#items)* }
|
||||
}
|
||||
.to_tokens(&mut result);
|
||||
}
|
||||
result.into()
|
||||
}
|
||||
|
||||
#[proc_macro]
|
||||
pub fn nvml_function_declarations(tokens: TokenStream) -> TokenStream {
|
||||
function_declarations(tokens, NVML_RS)
|
||||
}
|
||||
struct FnDeclInput {
|
||||
normal_macro: Path,
|
||||
overrides: Punctuated<OverrideMacro, Token![,]>,
|
||||
}
|
||||
|
||||
impl Parse for FnDeclInput {
|
||||
fn parse(input: ParseStream) -> syn::Result<Self> {
|
||||
let normal_macro = input.parse::<Path>()?;
|
||||
let overrides = if input.is_empty() {
|
||||
Punctuated::new()
|
||||
} else {
|
||||
input.parse::<Token![,]>()?;
|
||||
input.parse_terminated(OverrideMacro::parse, Token![,])?
|
||||
};
|
||||
Ok(Self {
|
||||
normal_macro,
|
||||
overrides,
|
||||
})
|
||||
}
|
||||
}
|
||||
struct OverrideMacro {
|
||||
macro_: Path,
|
||||
functions: Punctuated<Ident, Token![,]>,
|
||||
}
|
||||
|
||||
impl Parse for OverrideMacro {
|
||||
fn parse(input: ParseStream) -> syn::Result<Self> {
|
||||
let macro_ = input.parse::<Path>()?;
|
||||
input.parse::<Token![<=]>()?;
|
||||
let functions_content;
|
||||
bracketed!(functions_content in input);
|
||||
let functions = functions_content.parse_terminated(Ident::parse, Token![,])?;
|
||||
Ok(Self { macro_, functions })
|
||||
}
|
||||
}
|
||||
|
||||
struct ChooseMacro {
|
||||
default: (Path, Vec<proc_macro2::TokenStream>),
|
||||
override_lookup: FxHashMap<Ident, Path>,
|
||||
override_sets: FxHashMap<Path, Vec<proc_macro2::TokenStream>>,
|
||||
}
|
||||
|
||||
impl ChooseMacro {
|
||||
fn new(input: FnDeclInput) -> Self {
|
||||
let mut override_lookup = FxHashMap::default();
|
||||
let mut override_sets = FxHashMap::default();
|
||||
for OverrideMacro { macro_, functions } in input.overrides {
|
||||
for ident in functions {
|
||||
override_lookup.insert(ident, macro_.clone());
|
||||
override_sets.insert(macro_.clone(), Vec::new());
|
||||
}
|
||||
}
|
||||
Self {
|
||||
default: (input.normal_macro, Vec::new()),
|
||||
override_lookup,
|
||||
override_sets,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, ident: &Ident, tokens: proc_macro2::TokenStream) {
|
||||
match self.override_lookup.get(ident) {
|
||||
Some(override_macro) => {
|
||||
self.override_sets
|
||||
.get_mut(override_macro)
|
||||
.unwrap()
|
||||
.push(tokens);
|
||||
}
|
||||
None => self.default.1.push(tokens),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For some reason prettyplease will append trailing comma *only*
|
||||
// if there are two or more arguments
|
||||
struct FixFnSignatures;
|
||||
|
||||
impl VisitMut for FixFnSignatures {
|
||||
fn visit_signature_mut(&mut self, s: &mut syn::Signature) {
|
||||
s.inputs.pop_punct();
|
||||
}
|
||||
}
|
||||
|
||||
const MODULES: &[&str] = &[
|
||||
"context", "device", "driver", "function", "link", "memory", "module", "pointer",
|
||||
];
|
||||
|
||||
#[proc_macro]
|
||||
pub fn cuda_normalize_fn(tokens: TokenStream) -> TokenStream {
|
||||
let mut path = parse_macro_input!(tokens as syn::Path);
|
||||
let fn_ = path
|
||||
.segments
|
||||
.pop()
|
||||
.unwrap()
|
||||
.into_tuple()
|
||||
.0
|
||||
.ident
|
||||
.to_string();
|
||||
let already_has_module = MODULES.contains(&&*path.segments.last().unwrap().ident.to_string());
|
||||
let segments: Vec<String> = split(&fn_[2..]); // skip "cu"
|
||||
let fn_path = join(segments, !already_has_module);
|
||||
quote! {
|
||||
#path #fn_path
|
||||
}
|
||||
.into()
|
||||
}
|
||||
|
||||
fn split(fn_: &str) -> Vec<String> {
|
||||
let mut result = Vec::new();
|
||||
for c in fn_.chars() {
|
||||
if c.is_ascii_uppercase() {
|
||||
result.push(c.to_ascii_lowercase().to_string());
|
||||
} else {
|
||||
result.last_mut().unwrap().push(c);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn join(fn_: Vec<String>, find_module: bool) -> Punctuated<Ident, Token![::]> {
|
||||
fn full_form(segment: &str) -> Option<&[&str]> {
|
||||
Some(match segment {
|
||||
"ctx" => &["context"],
|
||||
"func" => &["function"],
|
||||
"mem" => &["memory"],
|
||||
"memcpy" => &["memory", "copy"],
|
||||
"memset" => &["memory", "set"],
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
let mut normalized: Vec<&str> = Vec::new();
|
||||
for segment in fn_.iter() {
|
||||
match full_form(segment) {
|
||||
Some(segments) => normalized.extend(segments.into_iter()),
|
||||
None => normalized.push(&*segment),
|
||||
}
|
||||
}
|
||||
if !find_module {
|
||||
return [Ident::new(&normalized.join("_"), Span::call_site())]
|
||||
.into_iter()
|
||||
.collect();
|
||||
}
|
||||
if !MODULES.contains(&normalized[0]) {
|
||||
let mut globalized = vec!["driver"];
|
||||
globalized.extend(normalized);
|
||||
normalized = globalized;
|
||||
}
|
||||
let (module, path) = normalized.split_first().unwrap();
|
||||
let path = path.join("_");
|
||||
[module, &&*path]
|
||||
.into_iter()
|
||||
.map(|s| Ident::new(s, Span::call_site()))
|
||||
.collect()
|
||||
}
|
7857
cuda_base/src/nvml.rs
Normal file
7857
cuda_base/src/nvml.rs
Normal file
File diff suppressed because it is too large
Load Diff
9
cuda_types/Cargo.toml
Normal file
9
cuda_types/Cargo.toml
Normal file
@ -0,0 +1,9 @@
|
||||
[package]
|
||||
name = "cuda_types"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
cuda_base = { path = "../cuda_base" }
|
||||
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
|
8110
cuda_types/src/cuda.rs
Normal file
8110
cuda_types/src/cuda.rs
Normal file
File diff suppressed because it is too large
Load Diff
2
cuda_types/src/lib.rs
Normal file
2
cuda_types/src/lib.rs
Normal file
@ -0,0 +1,2 @@
|
||||
pub mod cuda;
|
||||
pub mod nvml;
|
4185
cuda_types/src/nvml.rs
Normal file
4185
cuda_types/src/nvml.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/* automatically generated by rust-bindgen 0.56.0 */
|
||||
|
||||
/* automatically generated by rust-bindgen 0.56.0 */
|
||||
|
||||
pub type wchar_t = ::std::os::raw::c_ushort;
|
||||
pub type ULONG = ::std::os::raw::c_ulong;
|
||||
pub type DWORD = ::std::os::raw::c_ulong;
|
||||
@ -701,10 +701,10 @@ pub struct _DETOUR_TRAMPOLINE {
|
||||
pub type PDETOUR_TRAMPOLINE = *mut _DETOUR_TRAMPOLINE;
|
||||
#[doc = " Binary Typedefs."]
|
||||
pub type PF_DETOUR_BINARY_BYWAY_CALLBACK = ::std::option::Option<
|
||||
unsafe extern "C" fn(pContext: PVOID, pszFile: LPCSTR, ppszOutFile: *mut LPCSTR) -> BOOL,
|
||||
unsafe extern "stdcall" fn(pContext: PVOID, pszFile: LPCSTR, ppszOutFile: *mut LPCSTR) -> BOOL,
|
||||
>;
|
||||
pub type PF_DETOUR_BINARY_FILE_CALLBACK = ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
unsafe extern "stdcall" fn(
|
||||
pContext: PVOID,
|
||||
pszOrigFile: LPCSTR,
|
||||
pszFile: LPCSTR,
|
||||
@ -712,7 +712,7 @@ pub type PF_DETOUR_BINARY_FILE_CALLBACK = ::std::option::Option<
|
||||
) -> BOOL,
|
||||
>;
|
||||
pub type PF_DETOUR_BINARY_SYMBOL_CALLBACK = ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
unsafe extern "stdcall" fn(
|
||||
pContext: PVOID,
|
||||
nOrigOrdinal: ULONG,
|
||||
nOrdinal: ULONG,
|
||||
@ -723,18 +723,18 @@ pub type PF_DETOUR_BINARY_SYMBOL_CALLBACK = ::std::option::Option<
|
||||
) -> BOOL,
|
||||
>;
|
||||
pub type PF_DETOUR_BINARY_COMMIT_CALLBACK =
|
||||
::std::option::Option<unsafe extern "C" fn(pContext: PVOID) -> BOOL>;
|
||||
::std::option::Option<unsafe extern "stdcall" fn(pContext: PVOID) -> BOOL>;
|
||||
pub type PF_DETOUR_ENUMERATE_EXPORT_CALLBACK = ::std::option::Option<
|
||||
unsafe extern "C" fn(pContext: PVOID, nOrdinal: ULONG, pszName: LPCSTR, pCode: PVOID) -> BOOL,
|
||||
unsafe extern "stdcall" fn(pContext: PVOID, nOrdinal: ULONG, pszName: LPCSTR, pCode: PVOID) -> BOOL,
|
||||
>;
|
||||
pub type PF_DETOUR_IMPORT_FILE_CALLBACK = ::std::option::Option<
|
||||
unsafe extern "C" fn(pContext: PVOID, hModule: HMODULE, pszFile: LPCSTR) -> BOOL,
|
||||
unsafe extern "stdcall" fn(pContext: PVOID, hModule: HMODULE, pszFile: LPCSTR) -> BOOL,
|
||||
>;
|
||||
pub type PF_DETOUR_IMPORT_FUNC_CALLBACK = ::std::option::Option<
|
||||
unsafe extern "C" fn(pContext: PVOID, nOrdinal: DWORD, pszFunc: LPCSTR, pvFunc: PVOID) -> BOOL,
|
||||
unsafe extern "stdcall" fn(pContext: PVOID, nOrdinal: DWORD, pszFunc: LPCSTR, pvFunc: PVOID) -> BOOL,
|
||||
>;
|
||||
pub type PF_DETOUR_IMPORT_FUNC_CALLBACK_EX = ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
unsafe extern "stdcall" fn(
|
||||
pContext: PVOID,
|
||||
nOrdinal: DWORD,
|
||||
pszFunc: LPCSTR,
|
||||
@ -742,26 +742,26 @@ pub type PF_DETOUR_IMPORT_FUNC_CALLBACK_EX = ::std::option::Option<
|
||||
) -> BOOL,
|
||||
>;
|
||||
pub type PDETOUR_BINARY = *mut ::std::os::raw::c_void;
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
#[doc = " Transaction APIs."]
|
||||
pub fn DetourTransactionBegin() -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourTransactionAbort() -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourTransactionCommit() -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourTransactionCommitEx(pppFailedPointer: *mut *mut PVOID) -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourUpdateThread(hThread: HANDLE) -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourAttach(ppPointer: *mut PVOID, pDetour: PVOID) -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourAttachEx(
|
||||
ppPointer: *mut PVOID,
|
||||
pDetour: PVOID,
|
||||
@ -770,29 +770,29 @@ extern "C" {
|
||||
ppRealDetour: *mut PVOID,
|
||||
) -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourDetach(ppPointer: *mut PVOID, pDetour: PVOID) -> LONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourSetIgnoreTooSmall(fIgnore: BOOL) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourSetRetainRegions(fRetain: BOOL) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourSetSystemRegionLowerBound(pSystemRegionLowerBound: PVOID) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourSetSystemRegionUpperBound(pSystemRegionUpperBound: PVOID) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
#[doc = " Code Functions."]
|
||||
pub fn DetourFindFunction(pszModule: LPCSTR, pszFunction: LPCSTR) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCodeFromPointer(pPointer: PVOID, ppGlobals: *mut PVOID) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCopyInstruction(
|
||||
pDst: PVOID,
|
||||
ppDstPool: *mut PVOID,
|
||||
@ -801,36 +801,36 @@ extern "C" {
|
||||
plExtra: *mut LONG,
|
||||
) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourSetCodeModule(hModule: HMODULE, fLimitReferencesToModule: BOOL) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourAllocateRegionWithinJumpBounds(
|
||||
pbTarget: LPCVOID,
|
||||
pcbAllocatedSize: PDWORD,
|
||||
) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
#[doc = " Loaded Binary Functions."]
|
||||
pub fn DetourGetContainingModule(pvAddr: PVOID) -> HMODULE;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourEnumerateModules(hModuleLast: HMODULE) -> HMODULE;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourGetEntryPoint(hModule: HMODULE) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourGetModuleSize(hModule: HMODULE) -> ULONG;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourEnumerateExports(
|
||||
hModule: HMODULE,
|
||||
pContext: PVOID,
|
||||
pfExport: PF_DETOUR_ENUMERATE_EXPORT_CALLBACK,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourEnumerateImports(
|
||||
hModule: HMODULE,
|
||||
pContext: PVOID,
|
||||
@ -838,7 +838,7 @@ extern "C" {
|
||||
pfImportFunc: PF_DETOUR_IMPORT_FUNC_CALLBACK,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourEnumerateImportsEx(
|
||||
hModule: HMODULE,
|
||||
pContext: PVOID,
|
||||
@ -846,20 +846,20 @@ extern "C" {
|
||||
pfImportFuncEx: PF_DETOUR_IMPORT_FUNC_CALLBACK_EX,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourFindPayload(hModule: HMODULE, rguid: *const GUID, pcbData: *mut DWORD) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourFindPayloadEx(rguid: *const GUID, pcbData: *mut DWORD) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourGetSizeOfPayloads(hModule: HMODULE) -> DWORD;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
#[doc = " Persistent Binary Functions."]
|
||||
pub fn DetourBinaryOpen(hFile: HANDLE) -> PDETOUR_BINARY;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryEnumeratePayloads(
|
||||
pBinary: PDETOUR_BINARY,
|
||||
pGuid: *mut GUID,
|
||||
@ -867,14 +867,14 @@ extern "C" {
|
||||
pnIterator: *mut DWORD,
|
||||
) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryFindPayload(
|
||||
pBinary: PDETOUR_BINARY,
|
||||
rguid: *const GUID,
|
||||
pcbData: *mut DWORD,
|
||||
) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinarySetPayload(
|
||||
pBinary: PDETOUR_BINARY,
|
||||
rguid: *const GUID,
|
||||
@ -882,16 +882,16 @@ extern "C" {
|
||||
cbData: DWORD,
|
||||
) -> PVOID;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryDeletePayload(pBinary: PDETOUR_BINARY, rguid: *const GUID) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryPurgePayloads(pBinary: PDETOUR_BINARY) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryResetImports(pBinary: PDETOUR_BINARY) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryEditImports(
|
||||
pBinary: PDETOUR_BINARY,
|
||||
pContext: PVOID,
|
||||
@ -901,15 +901,15 @@ extern "C" {
|
||||
pfCommit: PF_DETOUR_BINARY_COMMIT_CALLBACK,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryWrite(pBinary: PDETOUR_BINARY, hFile: HANDLE) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourBinaryClose(pBinary: PDETOUR_BINARY) -> BOOL;
|
||||
}
|
||||
#[doc = " Create Process & Load Dll."]
|
||||
pub type PDETOUR_CREATE_PROCESS_ROUTINEA = ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
unsafe extern "stdcall" fn(
|
||||
lpApplicationName: LPCSTR,
|
||||
lpCommandLine: LPSTR,
|
||||
lpProcessAttributes: LPSECURITY_ATTRIBUTES,
|
||||
@ -923,7 +923,7 @@ pub type PDETOUR_CREATE_PROCESS_ROUTINEA = ::std::option::Option<
|
||||
) -> BOOL,
|
||||
>;
|
||||
pub type PDETOUR_CREATE_PROCESS_ROUTINEW = ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
unsafe extern "stdcall" fn(
|
||||
lpApplicationName: LPCWSTR,
|
||||
lpCommandLine: LPWSTR,
|
||||
lpProcessAttributes: LPSECURITY_ATTRIBUTES,
|
||||
@ -936,7 +936,7 @@ pub type PDETOUR_CREATE_PROCESS_ROUTINEW = ::std::option::Option<
|
||||
lpProcessInformation: LPPROCESS_INFORMATION,
|
||||
) -> BOOL,
|
||||
>;
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCreateProcessWithDllA(
|
||||
lpApplicationName: LPCSTR,
|
||||
lpCommandLine: LPSTR,
|
||||
@ -952,7 +952,7 @@ extern "C" {
|
||||
pfCreateProcessA: PDETOUR_CREATE_PROCESS_ROUTINEA,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCreateProcessWithDllW(
|
||||
lpApplicationName: LPCWSTR,
|
||||
lpCommandLine: LPWSTR,
|
||||
@ -968,7 +968,7 @@ extern "C" {
|
||||
pfCreateProcessW: PDETOUR_CREATE_PROCESS_ROUTINEW,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCreateProcessWithDllExA(
|
||||
lpApplicationName: LPCSTR,
|
||||
lpCommandLine: LPSTR,
|
||||
@ -984,7 +984,7 @@ extern "C" {
|
||||
pfCreateProcessA: PDETOUR_CREATE_PROCESS_ROUTINEA,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCreateProcessWithDllExW(
|
||||
lpApplicationName: LPCWSTR,
|
||||
lpCommandLine: LPWSTR,
|
||||
@ -1000,7 +1000,7 @@ extern "C" {
|
||||
pfCreateProcessW: PDETOUR_CREATE_PROCESS_ROUTINEW,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCreateProcessWithDllsA(
|
||||
lpApplicationName: LPCSTR,
|
||||
lpCommandLine: LPSTR,
|
||||
@ -1017,7 +1017,7 @@ extern "C" {
|
||||
pfCreateProcessA: PDETOUR_CREATE_PROCESS_ROUTINEA,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCreateProcessWithDllsW(
|
||||
lpApplicationName: LPCWSTR,
|
||||
lpCommandLine: LPWSTR,
|
||||
@ -1034,21 +1034,21 @@ extern "C" {
|
||||
pfCreateProcessW: PDETOUR_CREATE_PROCESS_ROUTINEW,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourProcessViaHelperA(
|
||||
dwTargetPid: DWORD,
|
||||
lpDllName: LPCSTR,
|
||||
pfCreateProcessA: PDETOUR_CREATE_PROCESS_ROUTINEA,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourProcessViaHelperW(
|
||||
dwTargetPid: DWORD,
|
||||
lpDllName: LPCSTR,
|
||||
pfCreateProcessW: PDETOUR_CREATE_PROCESS_ROUTINEW,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourProcessViaHelperDllsA(
|
||||
dwTargetPid: DWORD,
|
||||
nDlls: DWORD,
|
||||
@ -1056,7 +1056,7 @@ extern "C" {
|
||||
pfCreateProcessA: PDETOUR_CREATE_PROCESS_ROUTINEA,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourProcessViaHelperDllsW(
|
||||
dwTargetPid: DWORD,
|
||||
nDlls: DWORD,
|
||||
@ -1064,11 +1064,11 @@ extern "C" {
|
||||
pfCreateProcessW: PDETOUR_CREATE_PROCESS_ROUTINEW,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourUpdateProcessWithDll(hProcess: HANDLE, rlpDlls: *mut LPCSTR, nDlls: DWORD)
|
||||
-> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourUpdateProcessWithDllEx(
|
||||
hProcess: HANDLE,
|
||||
hImage: HMODULE,
|
||||
@ -1077,7 +1077,7 @@ extern "C" {
|
||||
nDlls: DWORD,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourCopyPayloadToProcess(
|
||||
hProcess: HANDLE,
|
||||
rguid: *const GUID,
|
||||
@ -1085,15 +1085,15 @@ extern "C" {
|
||||
cbData: DWORD,
|
||||
) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourRestoreAfterWith() -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourRestoreAfterWithEx(pvData: PVOID, cbData: DWORD) -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourIsHelperProcess() -> BOOL;
|
||||
}
|
||||
extern "C" {
|
||||
extern "stdcall" {
|
||||
pub fn DetourFinishHelperProcess(arg1: HWND, arg2: HINSTANCE, arg3: LPSTR, arg4: INT);
|
||||
}
|
||||
|
8
ext/amd_comgr-sys/Cargo.toml
vendored
Normal file
8
ext/amd_comgr-sys/Cargo.toml
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "amd_comgr-sys"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
links = "amd_comgr"
|
||||
|
||||
[lib]
|
1
ext/amd_comgr-sys/README
vendored
Normal file
1
ext/amd_comgr-sys/README
vendored
Normal file
@ -0,0 +1 @@
|
||||
bindgen --rust-target 1.77 /opt/rocm/include/amd_comgr/amd_comgr.h -o /tmp/amd_comgr.rs --no-layout-tests --default-enum-style=newtype --allowlist-function "amd_comgr.*" --allowlist-type "amd_comgr.*" --no-derive-debug --must-use-type amd_comgr_status_t --allowlist-var "^AMD_COMGR.*$"
|
@ -1,9 +1,9 @@
|
||||
use env::VarError;
|
||||
use std::env::VarError;
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
fn main() -> Result<(), VarError> {
|
||||
println!("cargo:rustc-link-lib=dylib=ze_loader");
|
||||
fn main() -> Result<(), VarError> {
|
||||
if cfg!(windows) {
|
||||
println!("cargo:rustc-link-lib=dylib=amd_comgr_2");
|
||||
let env = env::var("CARGO_CFG_TARGET_ENV")?;
|
||||
if env == "msvc" {
|
||||
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
|
||||
@ -12,6 +12,9 @@ fn main() -> Result<(), VarError> {
|
||||
} else {
|
||||
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
|
||||
};
|
||||
} else {
|
||||
println!("cargo:rustc-link-lib=dylib=amd_comgr");
|
||||
println!("cargo:rustc-link-search=native=/opt/rocm/lib/");
|
||||
}
|
||||
Ok(())
|
||||
}
|
68
ext/amd_comgr-sys/lib/amd_comgr_2.def
vendored
Normal file
68
ext/amd_comgr-sys/lib/amd_comgr_2.def
vendored
Normal file
@ -0,0 +1,68 @@
|
||||
;
|
||||
; Definition file of amd_comgr_2.dll
|
||||
; Automatic generated by gendef
|
||||
; written by Kai Tietz 2008
|
||||
;
|
||||
LIBRARY "amd_comgr_2.dll"
|
||||
EXPORTS
|
||||
amd_comgr_action_data_count
|
||||
amd_comgr_action_data_get_data
|
||||
amd_comgr_action_info_get_isa_name
|
||||
amd_comgr_action_info_get_language
|
||||
amd_comgr_action_info_get_logging
|
||||
amd_comgr_action_info_get_option_list_count
|
||||
amd_comgr_action_info_get_option_list_item
|
||||
amd_comgr_action_info_get_options
|
||||
amd_comgr_action_info_get_working_directory_path
|
||||
amd_comgr_action_info_set_isa_name
|
||||
amd_comgr_action_info_set_language
|
||||
amd_comgr_action_info_set_logging
|
||||
amd_comgr_action_info_set_option_list
|
||||
amd_comgr_action_info_set_options
|
||||
amd_comgr_action_info_set_working_directory_path
|
||||
amd_comgr_create_action_info
|
||||
amd_comgr_create_data
|
||||
amd_comgr_create_data_set
|
||||
amd_comgr_create_disassembly_info
|
||||
amd_comgr_create_symbolizer_info
|
||||
amd_comgr_data_set_add
|
||||
amd_comgr_data_set_remove
|
||||
amd_comgr_demangle_symbol_name
|
||||
amd_comgr_destroy_action_info
|
||||
amd_comgr_destroy_data_set
|
||||
amd_comgr_destroy_disassembly_info
|
||||
amd_comgr_destroy_metadata
|
||||
amd_comgr_destroy_symbolizer_info
|
||||
amd_comgr_disassemble_instruction
|
||||
amd_comgr_do_action
|
||||
amd_comgr_get_data
|
||||
amd_comgr_get_data_isa_name
|
||||
amd_comgr_get_data_kind
|
||||
amd_comgr_get_data_metadata
|
||||
amd_comgr_get_data_name
|
||||
amd_comgr_get_isa_count
|
||||
amd_comgr_get_isa_metadata
|
||||
amd_comgr_get_isa_name
|
||||
amd_comgr_get_mangled_name
|
||||
amd_comgr_get_metadata_kind
|
||||
amd_comgr_get_metadata_list_size
|
||||
amd_comgr_get_metadata_map_size
|
||||
amd_comgr_get_metadata_string
|
||||
amd_comgr_get_version
|
||||
amd_comgr_index_list_metadata
|
||||
amd_comgr_iterate_map_metadata
|
||||
amd_comgr_iterate_symbols
|
||||
amd_comgr_lookup_code_object
|
||||
amd_comgr_map_elf_virtual_address_to_code_object_offset
|
||||
amd_comgr_map_name_expression_to_symbol_name
|
||||
amd_comgr_metadata_lookup
|
||||
amd_comgr_populate_mangled_names
|
||||
amd_comgr_populate_name_expression_map
|
||||
amd_comgr_release_data
|
||||
amd_comgr_set_data
|
||||
amd_comgr_set_data_from_file_slice
|
||||
amd_comgr_set_data_name
|
||||
amd_comgr_status_string
|
||||
amd_comgr_symbol_get_info
|
||||
amd_comgr_symbol_lookup
|
||||
amd_comgr_symbolize
|
BIN
ext/amd_comgr-sys/lib/amd_comgr_2.lib
vendored
Normal file
BIN
ext/amd_comgr-sys/lib/amd_comgr_2.lib
vendored
Normal file
Binary file not shown.
941
ext/amd_comgr-sys/src/amd_comgr.rs
vendored
Normal file
941
ext/amd_comgr-sys/src/amd_comgr.rs
vendored
Normal file
@ -0,0 +1,941 @@
|
||||
/* automatically generated by rust-bindgen 0.70.1 */
|
||||
|
||||
pub const AMD_COMGR_INTERFACE_VERSION_MAJOR: u32 = 2;
|
||||
pub const AMD_COMGR_INTERFACE_VERSION_MINOR: u32 = 7;
|
||||
impl amd_comgr_status_s {
|
||||
#[doc = " The function has been executed successfully."]
|
||||
pub const AMD_COMGR_STATUS_SUCCESS: amd_comgr_status_s =
|
||||
amd_comgr_status_s(unsafe { ::std::num::NonZeroU32::new_unchecked(0) });
|
||||
}
|
||||
impl amd_comgr_status_s {
|
||||
#[doc = " A generic error has occurred."]
|
||||
pub const AMD_COMGR_STATUS_ERROR: amd_comgr_status_s =
|
||||
amd_comgr_status_s(unsafe { ::std::num::NonZeroU32::new_unchecked(1) });
|
||||
}
|
||||
impl amd_comgr_status_s {
|
||||
#[doc = " One of the actual arguments does not meet a precondition stated\n in the documentation of the corresponding formal argument. This\n includes both invalid Action types, and invalid arguments to\n valid Action types."]
|
||||
pub const AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT: amd_comgr_status_s =
|
||||
amd_comgr_status_s(unsafe { ::std::num::NonZeroU32::new_unchecked(2) });
|
||||
}
|
||||
impl amd_comgr_status_s {
|
||||
#[doc = " Failed to allocate the necessary resources."]
|
||||
pub const AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES: amd_comgr_status_s =
|
||||
amd_comgr_status_s(unsafe { ::std::num::NonZeroU32::new_unchecked(3) });
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[doc = " @brief Status codes."]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)]
|
||||
pub struct amd_comgr_status_s(pub ::std::num::NonZeroU32);
|
||||
type amd_comgr_status_t = Result<(), self::amd_comgr_status_s>;
|
||||
// Size check
|
||||
const _: fn() = || {
|
||||
let _ = std::mem::transmute::<amd_comgr_status_t, u32>;
|
||||
};
|
||||
impl amd_comgr_language_s {
|
||||
#[doc = " No high level language."]
|
||||
pub const AMD_COMGR_LANGUAGE_NONE: amd_comgr_language_s = amd_comgr_language_s(0);
|
||||
}
|
||||
impl amd_comgr_language_s {
|
||||
#[doc = " OpenCL 1.2."]
|
||||
pub const AMD_COMGR_LANGUAGE_OPENCL_1_2: amd_comgr_language_s = amd_comgr_language_s(1);
|
||||
}
|
||||
impl amd_comgr_language_s {
|
||||
#[doc = " OpenCL 2.0."]
|
||||
pub const AMD_COMGR_LANGUAGE_OPENCL_2_0: amd_comgr_language_s = amd_comgr_language_s(2);
|
||||
}
|
||||
impl amd_comgr_language_s {
|
||||
#[doc = " AMD Hetrogeneous C++ (HC)."]
|
||||
pub const AMD_COMGR_LANGUAGE_HC: amd_comgr_language_s = amd_comgr_language_s(3);
|
||||
}
|
||||
impl amd_comgr_language_s {
|
||||
#[doc = " HIP."]
|
||||
pub const AMD_COMGR_LANGUAGE_HIP: amd_comgr_language_s = amd_comgr_language_s(4);
|
||||
}
|
||||
impl amd_comgr_language_s {
|
||||
#[doc = " LLVM IR, either textual (.ll) or bitcode (.bc) format."]
|
||||
pub const AMD_COMGR_LANGUAGE_LLVM_IR: amd_comgr_language_s = amd_comgr_language_s(5);
|
||||
}
|
||||
impl amd_comgr_language_s {
|
||||
#[doc = " Marker for last valid language."]
|
||||
pub const AMD_COMGR_LANGUAGE_LAST: amd_comgr_language_s = amd_comgr_language_s(5);
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[doc = " @brief The source languages supported by the compiler."]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct amd_comgr_language_s(pub ::std::os::raw::c_uint);
|
||||
#[doc = " @brief The source languages supported by the compiler."]
|
||||
pub use self::amd_comgr_language_s as amd_comgr_language_t;
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Query additional information about a status code.\n\n @param[in] status Status code.\n\n @param[out] status_string A NUL-terminated string that describes\n the error status.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n status is an invalid status code, or @p status_string is NULL."]
|
||||
pub fn amd_comgr_status_string(
|
||||
status: amd_comgr_status_t,
|
||||
status_string: *mut *const ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " @brief Get the version of the code object manager interface\n supported.\n\n An interface is backwards compatible with an implementation with an\n equal major version, and a greater than or equal minor version.\n\n @param[out] major Major version number.\n\n @param[out] minor Minor version number."]
|
||||
pub fn amd_comgr_get_version(major: *mut usize, minor: *mut usize);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " No data is available."]
|
||||
pub const AMD_COMGR_DATA_KIND_UNDEF: amd_comgr_data_kind_s = amd_comgr_data_kind_s(0);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a textual main source."]
|
||||
pub const AMD_COMGR_DATA_KIND_SOURCE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(1);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a textual source that is included in the main source\n or other include source."]
|
||||
pub const AMD_COMGR_DATA_KIND_INCLUDE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(2);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a precompiled-header source that is included in the main\n source or other include source."]
|
||||
pub const AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER: amd_comgr_data_kind_s =
|
||||
amd_comgr_data_kind_s(3);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a diagnostic output."]
|
||||
pub const AMD_COMGR_DATA_KIND_DIAGNOSTIC: amd_comgr_data_kind_s = amd_comgr_data_kind_s(4);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a textual log output."]
|
||||
pub const AMD_COMGR_DATA_KIND_LOG: amd_comgr_data_kind_s = amd_comgr_data_kind_s(5);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is compiler LLVM IR bit code for a specific isa."]
|
||||
pub const AMD_COMGR_DATA_KIND_BC: amd_comgr_data_kind_s = amd_comgr_data_kind_s(6);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a relocatable machine code object for a specific isa."]
|
||||
pub const AMD_COMGR_DATA_KIND_RELOCATABLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(7);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is an executable machine code object for a specific\n isa. An executable is the kind of code object that can be loaded\n and executed."]
|
||||
pub const AMD_COMGR_DATA_KIND_EXECUTABLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(8);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a block of bytes."]
|
||||
pub const AMD_COMGR_DATA_KIND_BYTES: amd_comgr_data_kind_s = amd_comgr_data_kind_s(9);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a fat binary (clang-offload-bundler output)."]
|
||||
pub const AMD_COMGR_DATA_KIND_FATBIN: amd_comgr_data_kind_s = amd_comgr_data_kind_s(16);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is an archive."]
|
||||
pub const AMD_COMGR_DATA_KIND_AR: amd_comgr_data_kind_s = amd_comgr_data_kind_s(17);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a bundled bitcode."]
|
||||
pub const AMD_COMGR_DATA_KIND_BC_BUNDLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(18);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " The data is a bundled archive."]
|
||||
pub const AMD_COMGR_DATA_KIND_AR_BUNDLE: amd_comgr_data_kind_s = amd_comgr_data_kind_s(19);
|
||||
}
|
||||
impl amd_comgr_data_kind_s {
|
||||
#[doc = " Marker for last valid data kind."]
|
||||
pub const AMD_COMGR_DATA_KIND_LAST: amd_comgr_data_kind_s = amd_comgr_data_kind_s(19);
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[doc = " @brief The kinds of data supported."]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct amd_comgr_data_kind_s(pub ::std::os::raw::c_uint);
|
||||
#[doc = " @brief The kinds of data supported."]
|
||||
pub use self::amd_comgr_data_kind_s as amd_comgr_data_kind_t;
|
||||
#[doc = " @brief A handle to a data object.\n\n Data objects are used to hold the data which is either an input or\n output of a code object manager action."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct amd_comgr_data_s {
|
||||
pub handle: u64,
|
||||
}
|
||||
#[doc = " @brief A handle to a data object.\n\n Data objects are used to hold the data which is either an input or\n output of a code object manager action."]
|
||||
pub type amd_comgr_data_t = amd_comgr_data_s;
|
||||
#[doc = " @brief A handle to an action data object.\n\n An action data object holds a set of data objects. These can be\n used as inputs to an action, or produced as the result of an\n action."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct amd_comgr_data_set_s {
|
||||
pub handle: u64,
|
||||
}
|
||||
#[doc = " @brief A handle to an action data object.\n\n An action data object holds a set of data objects. These can be\n used as inputs to an action, or produced as the result of an\n action."]
|
||||
pub type amd_comgr_data_set_t = amd_comgr_data_set_s;
|
||||
#[doc = " @brief A handle to an action information object.\n\n An action information object holds all the necessary information,\n excluding the input data objects, required to perform an action."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct amd_comgr_action_info_s {
|
||||
pub handle: u64,
|
||||
}
|
||||
#[doc = " @brief A handle to an action information object.\n\n An action information object holds all the necessary information,\n excluding the input data objects, required to perform an action."]
|
||||
pub type amd_comgr_action_info_t = amd_comgr_action_info_s;
|
||||
#[doc = " @brief A handle to a metadata node.\n\n A metadata node handle is used to traverse the metadata associated\n with a data node."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct amd_comgr_metadata_node_s {
|
||||
pub handle: u64,
|
||||
}
|
||||
#[doc = " @brief A handle to a metadata node.\n\n A metadata node handle is used to traverse the metadata associated\n with a data node."]
|
||||
pub type amd_comgr_metadata_node_t = amd_comgr_metadata_node_s;
|
||||
#[doc = " @brief A handle to a machine code object symbol.\n\n A symbol handle is used to obtain the properties of symbols of a machine code\n object. A symbol handle is invalidated when the data object containing the\n symbol is destroyed."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct amd_comgr_symbol_s {
|
||||
pub handle: u64,
|
||||
}
|
||||
#[doc = " @brief A handle to a machine code object symbol.\n\n A symbol handle is used to obtain the properties of symbols of a machine code\n object. A symbol handle is invalidated when the data object containing the\n symbol is destroyed."]
|
||||
pub type amd_comgr_symbol_t = amd_comgr_symbol_s;
|
||||
#[doc = " @brief A handle to a disassembly information object.\n\n A disassembly information object holds all the necessary information,\n excluding the input data, required to perform disassembly."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct amd_comgr_disassembly_info_s {
|
||||
pub handle: u64,
|
||||
}
|
||||
#[doc = " @brief A handle to a disassembly information object.\n\n A disassembly information object holds all the necessary information,\n excluding the input data, required to perform disassembly."]
|
||||
pub type amd_comgr_disassembly_info_t = amd_comgr_disassembly_info_s;
|
||||
#[doc = " @brief A handle to a symbolizer information object.\n\n A symbolizer information object holds all the necessary information\n required to perform symbolization."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct amd_comgr_symbolizer_info_s {
|
||||
pub handle: u64,
|
||||
}
|
||||
#[doc = " @brief A handle to a symbolizer information object.\n\n A symbolizer information object holds all the necessary information\n required to perform symbolization."]
|
||||
pub type amd_comgr_symbolizer_info_t = amd_comgr_symbolizer_info_s;
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Return the number of isa names supported by this version of\n the code object manager library.\n\n The isa name specifies the instruction set architecture that should\n be used in the actions that involve machine code generation or\n inspection.\n\n @param[out] count The number of isa names supported.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n count is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
|
||||
pub fn amd_comgr_get_isa_count(count: *mut usize) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Return the Nth isa name supported by this version of the\n code object manager library.\n\n @param[in] index The index of the isa name to be returned. The\n first isa name is index 0.\n\n @param[out] isa_name A null terminated string that is the isa name\n being requested.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n index is greater than the number of isa name supported by this\n version of the code object manager library. @p isa_name is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
|
||||
pub fn amd_comgr_get_isa_name(
|
||||
index: usize,
|
||||
isa_name: *mut *const ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get a handle to the metadata of an isa name.\n\n The structure of the returned metadata is isa name specific and versioned\n with details specified in\n https://llvm.org/docs/AMDGPUUsage.html#code-object-metadata.\n It can include information about the\n limits for resources such as registers and memory addressing.\n\n @param[in] isa_name The isa name to query.\n\n @param[out] metadata A handle to the metadata of the isa name. If\n the isa name has no metadata then the returned handle has a kind of\n @p AMD_COMGR_METADATA_KIND_NULL. The handle must be destroyed\n using @c amd_comgr_destroy_metadata.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n name is NULL or is not an isa name supported by this version of the\n code object manager library. @p metadata is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_isa_metadata(
|
||||
isa_name: *const ::std::os::raw::c_char,
|
||||
metadata: *mut amd_comgr_metadata_node_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Create a data object that can hold data of a specified kind.\n\n Data objects are reference counted and are destroyed when the\n reference count reaches 0. When a data object is created its\n reference count is 1, it has 0 bytes of data, it has an empty name,\n and it has no metadata.\n\n @param[in] kind The kind of data the object is intended to hold.\n\n @param[out] data A handle to the data object created. Its reference\n count is set to 1.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n kind is an invalid data kind, or @p\n AMD_COMGR_DATA_KIND_UNDEF. @p data is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to create the data object as out of resources."]
|
||||
pub fn amd_comgr_create_data(
|
||||
kind: amd_comgr_data_kind_t,
|
||||
data: *mut amd_comgr_data_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Indicate that no longer using a data object handle.\n\n The reference count of the associated data object is\n decremented. If it reaches 0 it is destroyed.\n\n @param[in] data The data object to release.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_release_data(data: amd_comgr_data_t) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the kind of the data object.\n\n @param[in] data The data object to query.\n\n @param[out] kind The kind of data the object.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object. @p kind is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to create the data object as out of resources."]
|
||||
pub fn amd_comgr_get_data_kind(
|
||||
data: amd_comgr_data_t,
|
||||
kind: *mut amd_comgr_data_kind_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set the data content of a data object to the specified\n bytes.\n\n Any previous value of the data object is overwritten. Any metadata\n associated with the data object is also replaced which invalidates\n all metadata handles to the old metadata.\n\n @param[in] data The data object to update.\n\n @param[in] size The number of bytes in the data specified by @p bytes.\n\n @param[in] bytes The bytes to set the data object to. The bytes are\n copied into the data object and can be freed after the call.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_set_data(
|
||||
data: amd_comgr_data_t,
|
||||
size: usize,
|
||||
bytes: *const ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief For the given open posix file descriptor, map a slice of the\n file into the data object. The slice is specified by @p offset and @p size.\n Internally this API calls amd_comgr_set_data and resets data object's\n current state.\n\n @param[in, out] data The data object to update.\n\n @param[in] file_descriptor The native file descriptor for an open file.\n The @p file_descriptor must not be passed into a system I/O function\n by any other thread while this function is executing. The offset in\n the file descriptor may be updated based on the requested size and\n underlying platform. The @p file_descriptor may be closed immediately\n after this function returns.\n\n @param[in] offset position relative to the start of the file\n specifying the beginning of the slice in @p file_descriptor.\n\n @param[in] size Size in bytes of the slice.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The operation is successful.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data is an invalid or\n the map operation failed."]
|
||||
pub fn amd_comgr_set_data_from_file_slice(
|
||||
data: amd_comgr_data_t,
|
||||
file_descriptor: ::std::os::raw::c_int,
|
||||
offset: u64,
|
||||
size: u64,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set the name associated with a data object.\n\n When compiling, the full name of an include directive is used to\n reference the contents of the include data object with the same\n name. The name may also be used for other data objects in log and\n diagnostic output.\n\n @param[in] data The data object to update.\n\n @param[in] name A null terminated string that specifies the name to\n use for the data object. If NULL then the name is set to the empty\n string.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_set_data_name(
|
||||
data: amd_comgr_data_t,
|
||||
name: *const ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the data contents, and/or the size of the data\n associated with a data object.\n\n @param[in] data The data object to query.\n\n @param[in, out] size On entry, the size of @p bytes. On return, if @p bytes\n is NULL, set to the size of the data object contents.\n\n @param[out] bytes If not NULL, then the first @p size bytes of the\n data object contents is copied. If NULL, no data is copied, and\n only @p size is updated (useful in order to find the size of buffer\n required to copy the data).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_data(
|
||||
data: amd_comgr_data_t,
|
||||
size: *mut usize,
|
||||
bytes: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the data object name and/or name length.\n\n @param[in] data The data object to query.\n\n @param[in, out] size On entry, the size of @p name. On return, the size of\n the data object name including the terminating null character.\n\n @param[out] name If not NULL, then the first @p size characters of the\n data object name are copied. If @p name is NULL, only @p size is updated\n (useful in order to find the size of buffer required to copy the name).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_data_name(
|
||||
data: amd_comgr_data_t,
|
||||
size: *mut usize,
|
||||
name: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the data object isa name and/or isa name length.\n\n @param[in] data The data object to query.\n\n @param[in, out] size On entry, the size of @p isa_name. On return, if @p\n isa_name is NULL, set to the size of the isa name including the terminating\n null character.\n\n @param[out] isa_name If not NULL, then the first @p size characters\n of the isa name are copied. If NULL, no isa name is copied, and\n only @p size is updated (useful in order to find the size of buffer\n required to copy the isa name).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, has kind @p\n AMD_COMGR_DATA_KIND_UNDEF, or is not an isa specific\n kind. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_data_isa_name(
|
||||
data: amd_comgr_data_t,
|
||||
size: *mut usize,
|
||||
isa_name: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Create a symbolizer info object.\n\n @param[in] code_object A data object denoting a code object for which\n symbolization should be performed. The kind of this object must be\n ::AMD_COMGR_DATA_KIND_RELOCATABLE, ::AMD_COMGR_DATA_KIND_EXECUTABLE,\n or ::AMD_COMGR_DATA_KIND_BYTES.\n\n @param[in] print_symbol_callback Function called by a successfull\n symbolize query. @p symbol is a null-terminated string containing the\n symbolization of the address and @p user_data is an arbitary user data.\n The callback does not own @p symbol, and it cannot be referenced once\n the callback returns.\n\n @param[out] symbolizer_info A handle to the symbolizer info object created.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if @p code_object is\n invalid or @p print_symbol_callback is null.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to create @p symbolizer_info as out of resources."]
|
||||
pub fn amd_comgr_create_symbolizer_info(
|
||||
code_object: amd_comgr_data_t,
|
||||
print_symbol_callback: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
symbol: *const ::std::os::raw::c_char,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
),
|
||||
>,
|
||||
symbolizer_info: *mut amd_comgr_symbolizer_info_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Destroy symbolizer info object.\n\n @param[in] symbolizer_info A handle to symbolizer info object to destroy.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS on successful execution.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if @p\n symbolizer_info is invalid."]
|
||||
pub fn amd_comgr_destroy_symbolizer_info(
|
||||
symbolizer_info: amd_comgr_symbolizer_info_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Symbolize an address.\n\n The @p address is symbolized using the symbol definitions of the\n @p code_object specified when the @p symbolizer_info was created.\n The @p print_symbol_callback callback function specified when the\n @p symbolizer_info was created is called passing the\n symbolization result as @p symbol and @p user_data value.\n\n If symbolization is not possible ::AMD_COMGR_STATUS_SUCCESS is returned and\n the string passed to the @p symbol argument of the @p print_symbol_callback\n specified when the @p symbolizer_info was created contains the text\n \"<invalid>\" or \"??\". This is consistent with `llvm-symbolizer` utility.\n\n @param[in] symbolizer_info A handle to symbolizer info object which should be\n used to symbolize the @p address.\n\n @param[in] address An unrelocated ELF address to which symbolization\n query should be performed.\n\n @param[in] is_code if true, the symbolizer symbolize the address as code\n and the symbolization result contains filename, function name, line number\n and column number, else the symbolizer symbolize the address as data and\n the symbolizaion result contains symbol name, symbol's starting address\n and symbol size.\n\n @param[in] user_data Arbitrary user-data passed to @p print_symbol_callback\n callback as described for @p symbolizer_info argument.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n symbolizer_info is an invalid data object."]
|
||||
pub fn amd_comgr_symbolize(
|
||||
symbolizer_info: amd_comgr_symbolizer_info_t,
|
||||
address: u64,
|
||||
is_code: bool,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get a handle to the metadata of a data object.\n\n @param[in] data The data object to query.\n\n @param[out] metadata A handle to the metadata of the data\n object. If the data object has no metadata then the returned handle\n has a kind of @p AMD_COMGR_METADATA_KIND_NULL. The\n handle must be destroyed using @c amd_comgr_destroy_metadata.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n data is an invalid data object, or has kind @p\n AMD_COMGR_DATA_KIND_UNDEF. @p metadata is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_data_metadata(
|
||||
data: amd_comgr_data_t,
|
||||
metadata: *mut amd_comgr_metadata_node_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Destroy a metadata handle.\n\n @param[in] metadata A metadata handle to destroy.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p metadata is an invalid\n metadata handle.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update metadata\n handle as out of resources."]
|
||||
pub fn amd_comgr_destroy_metadata(metadata: amd_comgr_metadata_node_t) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Create a data set object.\n\n @param[out] data_set A handle to the data set created. Initially it\n contains no data objects.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to create the data\n set object as out of resources."]
|
||||
pub fn amd_comgr_create_data_set(data_set: *mut amd_comgr_data_set_t) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Destroy a data set object.\n\n The reference counts of any associated data objects are decremented. Any\n handles to the data set object become invalid.\n\n @param[in] data_set A handle to the data set object to destroy.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update data set\n object as out of resources."]
|
||||
pub fn amd_comgr_destroy_data_set(data_set: amd_comgr_data_set_t) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Add a data object to a data set object if it is not already added.\n\n The reference count of the data object is incremented.\n\n @param[in] data_set A handle to the data set object to be updated.\n\n @param[in] data A handle to the data object to be added. If @p data_set\n already has the specified handle present, then it is not added. The order\n that data objects are added is preserved.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object. @p data is an invalid data object; has undef kind; has\n include kind but does not have a name.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update data set\n object as out of resources."]
|
||||
pub fn amd_comgr_data_set_add(
|
||||
data_set: amd_comgr_data_set_t,
|
||||
data: amd_comgr_data_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Remove all data objects of a specified kind from a data set object.\n\n The reference count of the removed data objects is decremented.\n\n @param[in] data_set A handle to the data set object to be updated.\n\n @param[in] data_kind The data kind of the data objects to be removed. If @p\n AMD_COMGR_DATA_KIND_UNDEF is specified then all data objects are removed.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object. @p data_kind is an invalid data kind.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update data set\n object as out of resources."]
|
||||
pub fn amd_comgr_data_set_remove(
|
||||
data_set: amd_comgr_data_set_t,
|
||||
data_kind: amd_comgr_data_kind_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Return the number of data objects of a specified data kind that are\n added to a data set object.\n\n @param[in] data_set A handle to the data set object to be queried.\n\n @param[in] data_kind The data kind of the data objects to be counted.\n\n @param[out] count The number of data objects of data kind @p data_kind.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object. @p data_kind is an invalid data kind or @p\n AMD_COMGR_DATA_KIND_UNDEF. @p count is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to query data set\n object as out of resources."]
|
||||
pub fn amd_comgr_action_data_count(
|
||||
data_set: amd_comgr_data_set_t,
|
||||
data_kind: amd_comgr_data_kind_t,
|
||||
count: *mut usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Return the Nth data object of a specified data kind that is added to a\n data set object.\n\n The reference count of the returned data object is incremented.\n\n @param[in] data_set A handle to the data set object to be queried.\n\n @param[in] data_kind The data kind of the data object to be returned.\n\n @param[in] index The index of the data object of data kind @data_kind to be\n returned. The first data object is index 0. The order of data objects matches\n the order that they were added to the data set object.\n\n @param[out] data The data object being requested.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data_set is an invalid\n data set object. @p data_kind is an invalid data kind or @p\n AMD_COMGR_DATA_KIND_UNDEF. @p index is greater than the number of data\n objects of kind @p data_kind. @p data is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to query data set\n object as out of resources."]
|
||||
pub fn amd_comgr_action_data_get_data(
|
||||
data_set: amd_comgr_data_set_t,
|
||||
data_kind: amd_comgr_data_kind_t,
|
||||
index: usize,
|
||||
data: *mut amd_comgr_data_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Create an action info object.\n\n @param[out] action_info A handle to the action info object created.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to create the action info object as out of resources."]
|
||||
pub fn amd_comgr_create_action_info(
|
||||
action_info: *mut amd_comgr_action_info_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Destroy an action info object.\n\n @param[in] action_info A handle to the action info object to destroy.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
|
||||
pub fn amd_comgr_destroy_action_info(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set the isa name of an action info object.\n\n When an action info object is created it has no isa name. Some\n actions require that the action info object has an isa name\n defined.\n\n @param[in] action_info A handle to the action info object to be\n updated.\n\n @param[in] isa_name A null terminated string that is the isa name. If NULL\n or the empty string then the isa name is cleared. The isa name is defined as\n the Code Object Target Identification string, described at\n https://llvm.org/docs/AMDGPUUsage.html#code-object-target-identification\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p isa_name is not an\n isa name supported by this version of the code object manager\n library.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
|
||||
pub fn amd_comgr_action_info_set_isa_name(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
isa_name: *const ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the isa name and/or isa name length.\n\n @param[in] action_info The action info object to query.\n\n @param[in, out] size On entry, the size of @p isa_name. On return, if @p\n isa_name is NULL, set to the size of the isa name including the terminating\n null character.\n\n @param[out] isa_name If not NULL, then the first @p size characters of the\n isa name are copied into @p isa_name. If the isa name is not set then an\n empty string is copied into @p isa_name. If NULL, no name is copied, and\n only @p size is updated (useful in order to find the size of buffer required\n to copy the name).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_action_info_get_isa_name(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
size: *mut usize,
|
||||
isa_name: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set the source language of an action info object.\n\n When an action info object is created it has no language defined\n which is represented by @p\n AMD_COMGR_LANGUAGE_NONE. Some actions require that\n the action info object has a source language defined.\n\n @param[in] action_info A handle to the action info object to be\n updated.\n\n @param[in] language The language to set. If @p\n AMD_COMGR_LANGUAGE_NONE then the language is cleared.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p language is an\n invalid language.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
|
||||
pub fn amd_comgr_action_info_set_language(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
language: amd_comgr_language_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the language for an action info object.\n\n @param[in] action_info The action info object to query.\n\n @param[out] language The language of the action info opject. @p\n AMD_COMGR_LANGUAGE_NONE if not defined,\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p language is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_action_info_get_language(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
language: *mut amd_comgr_language_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set the options string of an action info object.\n\n When an action info object is created it has an empty options string.\n\n This overrides any option strings or arrays previously set by calls to this\n function or @p amd_comgr_action_info_set_option_list.\n\n An @p action_info object which had its options set with this function can\n only have its option inspected with @p amd_comgr_action_info_get_options.\n\n @param[in] action_info A handle to the action info object to be\n updated.\n\n @param[in] options A null terminated string that is the options. If\n NULL or the empty string then the options are cleared.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources.\n\n @deprecated since 1.3\n @see amd_comgr_action_info_set_option_list"]
|
||||
pub fn amd_comgr_action_info_set_options(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
options: *const ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the options string and/or options strings length of an action\n info object.\n\n The @p action_info object must have had its options set with @p\n amd_comgr_action_info_set_options.\n\n @param[in] action_info The action info object to query.\n\n @param[in, out] size On entry, the size of @p options. On return, if @p\n options is NULL, set to the size of the options including the terminating\n null character.\n\n @param[out] options If not NULL, then the first @p size characters of\n the options are copied. If the options are not set then an empty\n string is copied. If NULL, options is not copied, and only @p size\n is updated (useful inorder to find the size of buffer required to\n copy the options).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The options of @p action_info were not set\n with @p amd_comgr_action_info_set_options.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources.\n\n @deprecated since 1.3\n @see amd_comgr_action_info_get_option_list_count and\n amd_comgr_action_info_get_option_list_item"]
|
||||
pub fn amd_comgr_action_info_get_options(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
size: *mut usize,
|
||||
options: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set the options array of an action info object.\n\n This overrides any option strings or arrays previously set by calls to this\n function or @p amd_comgr_action_info_set_options.\n\n An @p action_info object which had its options set with this function can\n only have its option inspected with @p\n amd_comgr_action_info_get_option_list_count and @p\n amd_comgr_action_info_get_option_list_item.\n\n @param[in] action_info A handle to the action info object to be updated.\n\n @param[in] options An array of null terminated strings. May be NULL if @p\n count is zero, which will result in an empty options array.\n\n @param[in] count The number of null terminated strings in @p options.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p action_info is an\n invalid action info object, or @p options is NULL and @p count is non-zero.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to update action\n info object as out of resources."]
|
||||
pub fn amd_comgr_action_info_set_option_list(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
options: *mut *const ::std::os::raw::c_char,
|
||||
count: usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Return the number of options in the options array.\n\n The @p action_info object must have had its options set with @p\n amd_comgr_action_info_set_option_list.\n\n @param[in] action_info The action info object to query.\n\n @param[out] count The number of options in the options array.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The options of @p action_info were never\n set, or not set with @p amd_comgr_action_info_set_option_list.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p action_info is an\n invalid action info object, or @p count is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to query the data\n object as out of resources."]
|
||||
pub fn amd_comgr_action_info_get_option_list_count(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
count: *mut usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Return the Nth option string in the options array and/or that\n option's length.\n\n The @p action_info object must have had its options set with @p\n amd_comgr_action_info_set_option_list.\n\n @param[in] action_info The action info object to query.\n\n @param[in] index The index of the option to be returned. The first option\n index is 0. The order is the same as the options when they were added in @p\n amd_comgr_action_info_set_options.\n\n @param[in, out] size On entry, the size of @p option. On return, if @option\n is NULL, set to the size of the Nth option string including the terminating\n null character.\n\n @param[out] option If not NULL, then the first @p size characters of the Nth\n option string are copied into @p option. If NULL, no option string is\n copied, and only @p size is updated (useful in order to find the size of\n buffer required to copy the option string).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The options of @p action_info were never\n set, or not set with @p amd_comgr_action_info_set_option_list.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p action_info is an\n invalid action info object, @p index is invalid, or @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to query the data\n object as out of resources."]
|
||||
pub fn amd_comgr_action_info_get_option_list_item(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
index: usize,
|
||||
size: *mut usize,
|
||||
option: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set the working directory of an action info object.\n\n When an action info object is created it has an empty working\n directory. Some actions use the working directory to resolve\n relative file paths.\n\n @param[in] action_info A handle to the action info object to be\n updated.\n\n @param[in] path A null terminated string that is the working\n directory path. If NULL or the empty string then the working\n directory is cleared.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
|
||||
pub fn amd_comgr_action_info_set_working_directory_path(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
path: *const ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the working directory path and/or working directory path\n length of an action info object.\n\n @param[in] action_info The action info object to query.\n\n @param[in, out] size On entry, the size of @p path. On return, if @p path is\n NULL, set to the size of the working directory path including the\n terminating null character.\n\n @param[out] path If not NULL, then the first @p size characters of\n the working directory path is copied. If the working directory path\n is not set then an empty string is copied. If NULL, the working\n directory path is not copied, and only @p size is updated (useful\n in order to find the size of buffer required to copy the working\n directory path).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_action_info_get_working_directory_path(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
size: *mut usize,
|
||||
path: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Set whether logging is enabled for an action info object.\n\n @param[in] action_info A handle to the action info object to be\n updated.\n\n @param[in] logging Whether logging should be enabled or disable.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action info object as out of resources."]
|
||||
pub fn amd_comgr_action_info_set_logging(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
logging: bool,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get whether logging is enabled for an action info object.\n\n @param[in] action_info The action info object to query.\n\n @param[out] logging Whether logging is enabled.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n action_info is an invalid action info object. @p logging is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_action_info_get_logging(
|
||||
action_info: amd_comgr_action_info_t,
|
||||
logging: *mut bool,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Preprocess each source data object in @p input in order. For each\n successful preprocessor invocation, add a source data object to @p result.\n Resolve any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the working\n directory path in @p info. Preprocess the source for the language in @p\n info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any preprocessing fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
|
||||
pub const AMD_COMGR_ACTION_SOURCE_TO_PREPROCESSOR: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(0);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Copy all existing data objects in @p input to @p output, then add the\n device-specific and language-specific precompiled headers required for\n compilation.\n\n Currently the only supported languages are @p AMD_COMGR_LANGUAGE_OPENCL_1_2\n and @p AMD_COMGR_LANGUAGE_OPENCL_2_0.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name or language\n is not set in @p info, or the language is not supported."]
|
||||
pub const AMD_COMGR_ACTION_ADD_PRECOMPILED_HEADERS: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(1);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Compile each source data object in @p input in order. For each\n successful compilation add a bc data object to @p result. Resolve\n any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce bc for isa name in @p\n info. Compile the source for the language in @p info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
|
||||
pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_BC: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(2);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Copy all existing data objects in @p input to @p output, then add the\n device-specific and language-specific bitcode libraries required for\n compilation.\n\n Currently the only supported languages are @p AMD_COMGR_LANGUAGE_OPENCL_1_2,\n @p AMD_COMGR_LANGUAGE_OPENCL_2_0, and @p AMD_COMGR_LANGUAGE_HIP.\n\n The options in @p info should be set to a set of language-specific flags.\n For OpenCL and HIP these include:\n\n correctly_rounded_sqrt\n daz_opt\n finite_only\n unsafe_math\n wavefrontsize64\n\n For example, to enable daz_opt and unsafe_math, the options should be set\n as:\n\n const char *options[] = {\"daz_opt, \"unsafe_math\"};\n size_t optionsCount = sizeof(options) / sizeof(options[0]);\n amd_comgr_action_info_set_option_list(info, options, optionsCount);\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name or language\n is not set in @p info, the language is not supported, an unknown\n language-specific flag is supplied, or a language-specific flag is\n repeated.\n\n @deprecated since 1.7\n @warning This action, followed by @c AMD_COMGR_ACTION_LINK_BC_TO_BC, may\n result in subtle bugs due to incorrect linking of the device libraries.\n The @c AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC action can\n be used as a workaround which ensures the link occurs correctly."]
|
||||
pub const AMD_COMGR_ACTION_ADD_DEVICE_LIBRARIES: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(3);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Link a collection of bitcodes, bundled bitcodes, and bundled bitcode\n archives in @p into a single composite (unbundled) bitcode @p.\n Any device library bc data object must be explicitly added to @p input if\n needed.\n\n Return @p AMD_COMGR_STATUS_ERROR if the link or unbundling fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if IsaName is not set in @p info and does not match the isa name\n of all bc data objects in @p input, or if the Name field is not set for\n any DataObject in the input set."]
|
||||
pub const AMD_COMGR_ACTION_LINK_BC_TO_BC: amd_comgr_action_kind_s = amd_comgr_action_kind_s(4);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Optimize each bc data object in @p input and create an optimized bc data\n object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if the optimization fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all bc data objects in @p input."]
|
||||
pub const AMD_COMGR_ACTION_OPTIMIZE_BC_TO_BC: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(5);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Perform code generation for each bc data object in @p input in\n order. For each successful code generation add a relocatable data\n object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any code\n generation fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all bc data objects in @p input."]
|
||||
pub const AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(6);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Perform code generation for each bc data object in @p input in\n order. For each successful code generation add an assembly source data\n object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any code\n generation fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all bc data objects in @p input."]
|
||||
pub const AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(7);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Link each relocatable data object in @p input together and add\n the linked relocatable data object to @p result. Any device\n library relocatable data object must be explicitly added to @p\n input if needed.\n\n Return @p AMD_COMGR_STATUS_ERROR if the link fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
|
||||
pub const AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_RELOCATABLE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(8);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Link each relocatable data object in @p input together and add\n the linked executable data object to @p result. Any device\n library relocatable data object must be explicitly added to @p\n input if needed.\n\n Return @p AMD_COMGR_STATUS_ERROR if the link fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
|
||||
pub const AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(9);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Assemble each source data object in @p input in order into machine code.\n For each successful assembly add a relocatable data object to @p result.\n Resolve any include source names using the names of include data objects in\n @p input. Resolve any include relative path names using the working\n directory path in @p info. Produce relocatable for isa name in @p info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any assembly fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if isa name is not set in\n @p info."]
|
||||
pub const AMD_COMGR_ACTION_ASSEMBLE_SOURCE_TO_RELOCATABLE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(10);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Disassemble each relocatable data object in @p input in\n order. For each successful disassembly add a source data object to\n @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any disassembly\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
|
||||
pub const AMD_COMGR_ACTION_DISASSEMBLE_RELOCATABLE_TO_SOURCE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(11);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Disassemble each executable data object in @p input in order. For\n each successful disassembly add a source data object to @p result.\n\n Return @p AMD_COMGR_STATUS_ERROR if any disassembly\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info and does not match the isa name\n of all relocatable data objects in @p input."]
|
||||
pub const AMD_COMGR_ACTION_DISASSEMBLE_EXECUTABLE_TO_SOURCE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(12);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Disassemble each bytes data object in @p input in order. For each\n successful disassembly add a source data object to @p\n result. Only simple assembly language commands are generate that\n corresponf to raw bytes are supported, not any directives that\n control the code object layout, or symbolic branch targets or\n names.\n\n Return @p AMD_COMGR_STATUS_ERROR if any disassembly\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name is not set in @p info"]
|
||||
pub const AMD_COMGR_ACTION_DISASSEMBLE_BYTES_TO_SOURCE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(13);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Compile each source data object in @p input in order. For each\n successful compilation add a fat binary to @p result. Resolve\n any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce fat binary for isa name in @p\n info. Compile the source for the language in @p info.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info.\n\n @deprecated since 2.5\n @see in-process compilation via AMD_COMGR_ACTION_COMPILE_SOURCE_TO_BC, etc.\n insteaad"]
|
||||
pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_FATBIN: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(14);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Compile each source data object in @p input in order. For each\n successful compilation add a bc data object to @p result. Resolve\n any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce bc for isa name in @p\n info. Compile the source for the language in @p info. Link against\n the device-specific and language-specific bitcode device libraries\n required for compilation.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
|
||||
pub const AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(15);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Compile a single source data object in @p input in order. For each\n successful compilation add a relocatable data object to @p result.\n Resolve any include source names using the names of include data objects\n in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce relocatable for hip name in @p\n info. Compile the source for the language in @p info. Link against\n the device-specific and language-specific bitcode device libraries\n required for compilation. Currently only supports HIP language.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
|
||||
pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_RELOCATABLE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(16);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Compile each source data object in @p input and create a single executabele\n in @p result. Resolve any include source names using the names of include\n data objects in @p input. Resolve any include relative path names using the\n working directory path in @p info. Produce executable for isa name in @p\n info. Compile the source for the language in @p info. Link against\n the device-specific and language-specific bitcode device libraries\n required for compilation.\n\n Return @p AMD_COMGR_STATUS_ERROR if any compilation\n fails.\n\n Return @p AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT\n if isa name or language is not set in @p info."]
|
||||
pub const AMD_COMGR_ACTION_COMPILE_SOURCE_TO_EXECUTABLE: amd_comgr_action_kind_s =
|
||||
amd_comgr_action_kind_s(17);
|
||||
}
|
||||
impl amd_comgr_action_kind_s {
|
||||
#[doc = " Marker for last valid action kind."]
|
||||
pub const AMD_COMGR_ACTION_LAST: amd_comgr_action_kind_s = amd_comgr_action_kind_s(17);
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[doc = " @brief The kinds of actions that can be performed."]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct amd_comgr_action_kind_s(pub ::std::os::raw::c_uint);
|
||||
#[doc = " @brief The kinds of actions that can be performed."]
|
||||
pub use self::amd_comgr_action_kind_s as amd_comgr_action_kind_t;
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Perform an action.\n\n Each action ignores any data objects in @p input that it does not\n use. If logging is enabled in @info then @p result will have a log\n data object added. Any diagnostic data objects produced by the\n action will be added to @p result. See the description of each\n action in @p amd_comgr_action_kind_t.\n\n @param[in] kind The action to perform.\n\n @param[in] info The action info to use when performing the action.\n\n @param[in] input The input data objects to the @p kind action.\n\n @param[out] result Any data objects are removed before performing\n the action which then adds all data objects produced by the action.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR An error was\n reported when executing the action.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n kind is an invalid action kind. @p input_data or @p result_data are\n invalid action data object handles. See the description of each\n action in @p amd_comgr_action_kind_t for other\n conditions that result in this status.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_do_action(
|
||||
kind: amd_comgr_action_kind_t,
|
||||
info: amd_comgr_action_info_t,
|
||||
input: amd_comgr_data_set_t,
|
||||
result: amd_comgr_data_set_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
impl amd_comgr_metadata_kind_s {
|
||||
#[doc = " The NULL metadata handle."]
|
||||
pub const AMD_COMGR_METADATA_KIND_NULL: amd_comgr_metadata_kind_s =
|
||||
amd_comgr_metadata_kind_s(0);
|
||||
}
|
||||
impl amd_comgr_metadata_kind_s {
|
||||
#[doc = " A sting value."]
|
||||
pub const AMD_COMGR_METADATA_KIND_STRING: amd_comgr_metadata_kind_s =
|
||||
amd_comgr_metadata_kind_s(1);
|
||||
}
|
||||
impl amd_comgr_metadata_kind_s {
|
||||
#[doc = " A map that consists of a set of key and value pairs."]
|
||||
pub const AMD_COMGR_METADATA_KIND_MAP: amd_comgr_metadata_kind_s = amd_comgr_metadata_kind_s(2);
|
||||
}
|
||||
impl amd_comgr_metadata_kind_s {
|
||||
#[doc = " A list that consists of a sequence of values."]
|
||||
pub const AMD_COMGR_METADATA_KIND_LIST: amd_comgr_metadata_kind_s =
|
||||
amd_comgr_metadata_kind_s(3);
|
||||
}
|
||||
impl amd_comgr_metadata_kind_s {
|
||||
#[doc = " Marker for last valid metadata kind."]
|
||||
pub const AMD_COMGR_METADATA_KIND_LAST: amd_comgr_metadata_kind_s =
|
||||
amd_comgr_metadata_kind_s(3);
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[doc = " @brief The kinds of metadata nodes."]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct amd_comgr_metadata_kind_s(pub ::std::os::raw::c_uint);
|
||||
#[doc = " @brief The kinds of metadata nodes."]
|
||||
pub use self::amd_comgr_metadata_kind_s as amd_comgr_metadata_kind_t;
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the kind of the metadata node.\n\n @param[in] metadata The metadata node to query.\n\n @param[out] kind The kind of the metadata node.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n metadata is an invalid metadata node. @p kind is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to create the data object as out of resources."]
|
||||
pub fn amd_comgr_get_metadata_kind(
|
||||
metadata: amd_comgr_metadata_node_t,
|
||||
kind: *mut amd_comgr_metadata_kind_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the string and/or string length from a metadata string\n node.\n\n @param[in] metadata The metadata node to query.\n\n @param[in, out] size On entry, the size of @p string. On return, if @p\n string is NULL, set to the size of the string including the terminating null\n character.\n\n @param[out] string If not NULL, then the first @p size characters\n of the string are copied. If NULL, no string is copied, and only @p\n size is updated (useful in order to find the size of buffer required\n to copy the string).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n metadata is an invalid metadata node, or does not have kind @p\n AMD_COMGR_METADATA_KIND_STRING. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_metadata_string(
|
||||
metadata: amd_comgr_metadata_node_t,
|
||||
size: *mut usize,
|
||||
string: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the map size from a metadata map node.\n\n @param[in] metadata The metadata node to query.\n\n @param[out] size The number of entries in the map.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n metadata is an invalid metadata node, or not of kind @p\n AMD_COMGR_METADATA_KIND_MAP. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_metadata_map_size(
|
||||
metadata: amd_comgr_metadata_node_t,
|
||||
size: *mut usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Iterate over the elements a metadata map node.\n\n @warning The metadata nodes which are passed to the callback are not owned\n by the callback, and are freed just after the callback returns. The callback\n must not save any references to its parameters between iterations.\n\n @param[in] metadata The metadata node to query.\n\n @param[in] callback The function to call for each entry in the map. The\n entry's key is passed in @p key, the entry's value is passed in @p value, and\n @p user_data is passed as @p user_data. If the function returns with a status\n other than @p AMD_COMGR_STATUS_SUCCESS then iteration is stopped.\n\n @param[in] user_data The value to pass to each invocation of @p\n callback. Allows context to be passed into the call back function.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR An error was\n reported by @p callback.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n metadata is an invalid metadata node, or not of kind @p\n AMD_COMGR_METADATA_KIND_MAP. @p callback is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to iterate the metadata as out of resources."]
|
||||
pub fn amd_comgr_iterate_map_metadata(
|
||||
metadata: amd_comgr_metadata_node_t,
|
||||
callback: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
key: amd_comgr_metadata_node_t,
|
||||
value: amd_comgr_metadata_node_t,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
) -> amd_comgr_status_t,
|
||||
>,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Use a string key to lookup an element of a metadata map\n node and return the entry value.\n\n @param[in] metadata The metadata node to query.\n\n @param[in] key A null terminated string that is the key to lookup.\n\n @param[out] value The metadata node of the @p key element of the\n @p metadata map metadata node. The handle must be destroyed\n using @c amd_comgr_destroy_metadata.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The map has no entry\n with a string key with the value @p key.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n metadata is an invalid metadata node, or not of kind @p\n AMD_COMGR_METADATA_KIND_MAP. @p key or @p value is\n NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to lookup metadata as out of resources."]
|
||||
pub fn amd_comgr_metadata_lookup(
|
||||
metadata: amd_comgr_metadata_node_t,
|
||||
key: *const ::std::os::raw::c_char,
|
||||
value: *mut amd_comgr_metadata_node_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Get the list size from a metadata list node.\n\n @param[in] metadata The metadata node to query.\n\n @param[out] size The number of entries in the list.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n metadata is an invalid metadata node, or does nopt have kind @p\n AMD_COMGR_METADATA_KIND_LIST. @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update the data object as out of resources."]
|
||||
pub fn amd_comgr_get_metadata_list_size(
|
||||
metadata: amd_comgr_metadata_node_t,
|
||||
size: *mut usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Return the Nth metadata node of a list metadata node.\n\n @param[in] metadata The metadata node to query.\n\n @param[in] index The index being requested. The first list element\n is index 0.\n\n @param[out] value The metadata node of the @p index element of the\n @p metadata list metadata node. The handle must be destroyed\n using @c amd_comgr_destroy_metadata.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p\n metadata is an invalid metadata node or not of kind @p\n AMD_COMGR_METADATA_INFO_LIST. @p index is greater\n than the number of list elements. @p value is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to update action data object as out of resources."]
|
||||
pub fn amd_comgr_index_list_metadata(
|
||||
metadata: amd_comgr_metadata_node_t,
|
||||
index: usize,
|
||||
value: *mut amd_comgr_metadata_node_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Iterate over the symbols of a machine code object.\n\n For a AMD_COMGR_DATA_KIND_RELOCATABLE the symbols in the ELF symtab section\n are iterated. For a AMD_COMGR_DATA_KIND_EXECUTABLE the symbols in the ELF\n dynsymtab are iterated.\n\n @param[in] data The data object to query.\n\n @param[in] callback The function to call for each symbol in the machine code\n data object. The symbol handle is passed in @p symbol and @p user_data is\n passed as @p user_data. If the function returns with a status other than @p\n AMD_COMGR_STATUS_SUCCESS then iteration is stopped.\n\n @param[in] user_data The value to pass to each invocation of @p\n callback. Allows context to be passed into the call back function.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR An error was\n reported by @p callback.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data is an invalid data\n object, or not of kind @p AMD_COMGR_DATA_KIND_RELOCATABLE or\n AMD_COMGR_DATA_KIND_EXECUTABLE. @p callback is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to iterate the data object as out of resources."]
|
||||
pub fn amd_comgr_iterate_symbols(
|
||||
data: amd_comgr_data_t,
|
||||
callback: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
symbol: amd_comgr_symbol_t,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
) -> amd_comgr_status_t,
|
||||
>,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Lookup a symbol in a machine code object by name.\n\n For a AMD_COMGR_DATA_KIND_RELOCATABLE the symbols in the ELF symtab section\n are inspected. For a AMD_COMGR_DATA_KIND_EXECUTABLE the symbols in the ELF\n dynsymtab are inspected.\n\n @param[in] data The data object to query.\n\n @param[in] name A null terminated string that is the symbol name to lookup.\n\n @param[out] symbol The symbol with the @p name.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The machine code object has no symbol\n with @p name.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data is an invalid data\n object, or not of kind @p AMD_COMGR_DATA_KIND_RELOCATABLE or\n AMD_COMGR_DATA_KIND_EXECUTABLE.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to lookup symbol as out of resources."]
|
||||
pub fn amd_comgr_symbol_lookup(
|
||||
data: amd_comgr_data_t,
|
||||
name: *const ::std::os::raw::c_char,
|
||||
symbol: *mut amd_comgr_symbol_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " The symbol's type is unknown.\n\n The user should not infer any specific type for symbols which return\n `AMD_COMGR_SYMBOL_TYPE_UNKNOWN`, and these symbols may return different\n types in future releases."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_UNKNOWN: amd_comgr_symbol_type_s = amd_comgr_symbol_type_s(-1);
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " The symbol's type is not specified."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_NOTYPE: amd_comgr_symbol_type_s = amd_comgr_symbol_type_s(0);
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " The symbol is associated with a data object, such as a variable, an array,\n and so on."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_OBJECT: amd_comgr_symbol_type_s = amd_comgr_symbol_type_s(1);
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " The symbol is associated with a function or other executable code."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_FUNC: amd_comgr_symbol_type_s = amd_comgr_symbol_type_s(2);
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " The symbol is associated with a section. Symbol table entries of this type\n exist primarily for relocation."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_SECTION: amd_comgr_symbol_type_s = amd_comgr_symbol_type_s(3);
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " Conventionally, the symbol's name gives the name of the source file\n associated with the object file."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_FILE: amd_comgr_symbol_type_s = amd_comgr_symbol_type_s(4);
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " The symbol labels an uninitialized common block."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_COMMON: amd_comgr_symbol_type_s = amd_comgr_symbol_type_s(5);
|
||||
}
|
||||
impl amd_comgr_symbol_type_s {
|
||||
#[doc = " The symbol is associated with an AMDGPU Code Object V2 kernel function."]
|
||||
pub const AMD_COMGR_SYMBOL_TYPE_AMDGPU_HSA_KERNEL: amd_comgr_symbol_type_s =
|
||||
amd_comgr_symbol_type_s(10);
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[doc = " @brief Machine code object symbol type."]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct amd_comgr_symbol_type_s(pub ::std::os::raw::c_int);
|
||||
#[doc = " @brief Machine code object symbol type."]
|
||||
pub use self::amd_comgr_symbol_type_s as amd_comgr_symbol_type_t;
|
||||
impl amd_comgr_symbol_info_s {
|
||||
#[doc = " The length of the symbol name in bytes. Does not include the NUL\n terminator. The type of this attribute is uint64_t."]
|
||||
pub const AMD_COMGR_SYMBOL_INFO_NAME_LENGTH: amd_comgr_symbol_info_s =
|
||||
amd_comgr_symbol_info_s(0);
|
||||
}
|
||||
impl amd_comgr_symbol_info_s {
|
||||
#[doc = " The name of the symbol. The type of this attribute is character array with\n the length equal to the value of the @p AMD_COMGR_SYMBOL_INFO_NAME_LENGTH\n attribute plus 1 for a NUL terminator."]
|
||||
pub const AMD_COMGR_SYMBOL_INFO_NAME: amd_comgr_symbol_info_s = amd_comgr_symbol_info_s(1);
|
||||
}
|
||||
impl amd_comgr_symbol_info_s {
|
||||
#[doc = " The kind of the symbol. The type of this attribute is @p\n amd_comgr_symbol_type_t."]
|
||||
pub const AMD_COMGR_SYMBOL_INFO_TYPE: amd_comgr_symbol_info_s = amd_comgr_symbol_info_s(2);
|
||||
}
|
||||
impl amd_comgr_symbol_info_s {
|
||||
#[doc = " Size of the variable. The value of this attribute is undefined if the\n symbol is not a variable. The type of this attribute is uint64_t."]
|
||||
pub const AMD_COMGR_SYMBOL_INFO_SIZE: amd_comgr_symbol_info_s = amd_comgr_symbol_info_s(3);
|
||||
}
|
||||
impl amd_comgr_symbol_info_s {
|
||||
#[doc = " Indicates whether the symbol is undefined. The type of this attribute is\n bool."]
|
||||
pub const AMD_COMGR_SYMBOL_INFO_IS_UNDEFINED: amd_comgr_symbol_info_s =
|
||||
amd_comgr_symbol_info_s(4);
|
||||
}
|
||||
impl amd_comgr_symbol_info_s {
|
||||
#[doc = " The value of the symbol. The type of this attribute is uint64_t."]
|
||||
pub const AMD_COMGR_SYMBOL_INFO_VALUE: amd_comgr_symbol_info_s = amd_comgr_symbol_info_s(5);
|
||||
}
|
||||
impl amd_comgr_symbol_info_s {
|
||||
#[doc = " Marker for last valid symbol info."]
|
||||
pub const AMD_COMGR_SYMBOL_INFO_LAST: amd_comgr_symbol_info_s = amd_comgr_symbol_info_s(5);
|
||||
}
|
||||
#[repr(transparent)]
|
||||
#[doc = " @brief Machine code object symbol attributes."]
|
||||
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||
pub struct amd_comgr_symbol_info_s(pub ::std::os::raw::c_uint);
|
||||
#[doc = " @brief Machine code object symbol attributes."]
|
||||
pub use self::amd_comgr_symbol_info_s as amd_comgr_symbol_info_t;
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Query information about a machine code object symbol.\n\n @param[in] symbol The symbol to query.\n\n @param[in] attribute Attribute to query.\n\n @param[out] value Pointer to an application-allocated buffer where to store\n the value of the attribute. If the buffer passed by the application is not\n large enough to hold the value of attribute, the behavior is undefined. The\n type of value returned is specified by @p amd_comgr_symbol_info_t.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has\n been executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The @p symbol does not have the requested @p\n attribute.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p symbol is an invalid\n symbol. @p attribute is an invalid value. @p value is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES\n Unable to query symbol as out of resources."]
|
||||
pub fn amd_comgr_symbol_get_info(
|
||||
symbol: amd_comgr_symbol_t,
|
||||
attribute: amd_comgr_symbol_info_t,
|
||||
value: *mut ::std::os::raw::c_void,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Create a disassembly info object.\n\n @param[in] isa_name A null terminated string that is the isa name of the\n target to disassemble for. The isa name is defined as the Code Object Target\n Identification string, described at\n https://llvm.org/docs/AMDGPUUsage.html#code-object-target-identification\n\n @param[in] read_memory_callback Function called to request @p size bytes\n from the program address space at @p from be read into @p to. The requested\n @p size is never zero. Returns the number of bytes which could be read, with\n the guarantee that no additional bytes will be available in any subsequent\n call.\n\n @param[in] print_instruction_callback Function called after a successful\n disassembly. @p instruction is a null terminated string containing the\n disassembled instruction. The callback does not own @p instruction, and it\n cannot be referenced once the callback returns.\n\n @param[in] print_address_annotation_callback Function called after @c\n print_instruction_callback returns, once for each instruction operand which\n was resolved to an absolute address. @p address is the absolute address in\n the program address space. It is intended to append a symbolic\n form of the address, perhaps as a comment, after the instruction disassembly\n produced by @c print_instruction_callback.\n\n @param[out] disassembly_info A handle to the disassembly info object\n created.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The disassembly info object was created.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p isa_name is NULL or\n invalid; or @p read_memory_callback, @p print_instruction_callback,\n or @p print_address_annotation_callback is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to create the\n disassembly info object as out of resources."]
|
||||
pub fn amd_comgr_create_disassembly_info(
|
||||
isa_name: *const ::std::os::raw::c_char,
|
||||
read_memory_callback: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
from: u64,
|
||||
to: *mut ::std::os::raw::c_char,
|
||||
size: u64,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
) -> u64,
|
||||
>,
|
||||
print_instruction_callback: ::std::option::Option<
|
||||
unsafe extern "C" fn(
|
||||
instruction: *const ::std::os::raw::c_char,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
),
|
||||
>,
|
||||
print_address_annotation_callback: ::std::option::Option<
|
||||
unsafe extern "C" fn(address: u64, user_data: *mut ::std::os::raw::c_void),
|
||||
>,
|
||||
disassembly_info: *mut amd_comgr_disassembly_info_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Destroy a disassembly info object.\n\n @param[in] disassembly_info A handle to the disassembly info object to\n destroy.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The disassembly info object was\n destroyed.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p disassembly_info is an\n invalid disassembly info object.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to destroy the\n disassembly info object as out of resources."]
|
||||
pub fn amd_comgr_destroy_disassembly_info(
|
||||
disassembly_info: amd_comgr_disassembly_info_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Disassemble a single instruction.\n\n @param[in] address The address of the first byte of the instruction in the\n program address space.\n\n @param[in] user_data Arbitrary user-data passed to each callback function\n during disassembly.\n\n @param[out] size The number of bytes consumed to decode the\n instruction, or consumed while failing to decode an invalid instruction.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The disassembly was successful.\n\n @retval ::AMD_COMGR_STATUS_ERROR The disassembly failed.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p disassembly_info is\n invalid or @p size is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Unable to disassemble the\n instruction as out of resources."]
|
||||
pub fn amd_comgr_disassemble_instruction(
|
||||
disassembly_info: amd_comgr_disassembly_info_t,
|
||||
address: u64,
|
||||
user_data: *mut ::std::os::raw::c_void,
|
||||
size: *mut u64,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Demangle a symbol name.\n\n @param[in] mangled_symbol_name A data object of kind @p\n AMD_COMGR_DATA_KIND_BYTES containing the mangled symbol name.\n\n @param[out] demangled_symbol_name A handle to the data object of kind @p\n AMD_COMGR_DATA_KIND_BYTES created and set to contain the demangled symbol\n name in case of successful completion. The handle must be released using\n @c amd_comgr_release_data. @p demangled_symbol_name is not updated for\n an error case.\n\n @note If the @p mangled_symbol_name cannot be demangled, it will be copied\n without changes to the @p demangled_symbol_name and AMD_COMGR_STATUS_SUCCESS\n is returned.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p mangled_symbol_name is\n an invalid data object or not of kind @p AMD_COMGR_DATA_KIND_BYTES or\n @p demangled_symbol_name is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_OUT_OF_RESOURCES Out of resources."]
|
||||
pub fn amd_comgr_demangle_symbol_name(
|
||||
mangled_symbol_name: amd_comgr_data_t,
|
||||
demangled_symbol_name: *mut amd_comgr_data_t,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Fetch mangled symbol names from a code object.\n\n @param[in] data A data object of kind @p\n AMD_COMGR_DATA_KIND_EXECUTABLE or @p AMD_COMGR_DATA_KIND_BC\n\n @param[out] count The number of mangled names retrieved. This value\n can be used as an upper bound to the Index provided to the corresponding\n amd_comgr_get_mangled_name() call.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data is\n an invalid data object or not of kind @p AMD_COMGR_DATA_KIND_EXECUTABLE or\n @p AMD_COMGR_DATA_KIND_BC.\n"]
|
||||
pub fn amd_comgr_populate_mangled_names(
|
||||
data: amd_comgr_data_t,
|
||||
count: *mut usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Fetch the Nth specific mangled name from a set of populated names or\n that name's length.\n\n The @p data must have had its mangled names populated with @p\n amd_comgr_populate_mangled_names.\n\n @param[in] data A data object of kind @p\n AMD_COMGR_DATA_KIND_EXECUTABLE or @p AMD_COMGR_DATA_KIND_BC used to\n identify which set of mangled names to retrive from.\n\n @param[in] index The index of the mangled name to be returned.\n\n @param[in, out] size For out, the size of @p mangled_name. For in,\n if @mangled_name is NULL, set to the size of the Nth option string including\n the terminating null character.\n\n @param[out] mangled_name If not NULL, then the first @p size characters of\n the Nth mangled name string are copied into @p mangled_name. If NULL, no\n mangled name string is copied, and only @p size is updated (useful in order\n to find the size of the buffer requried to copy the mangled_name string).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR @p data has not been used to\n populate a set of mangled names, or index is greater than the count of\n mangled names for that data object\n"]
|
||||
pub fn amd_comgr_get_mangled_name(
|
||||
data: amd_comgr_data_t,
|
||||
index: usize,
|
||||
size: *mut usize,
|
||||
mangled_name: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Populate a name expression map from a given code object.\n\n Used to map stub names *__amdgcn_name_expr_* in bitcodes and code\n objects generated by hip runtime to an associated (unmangled) name\n expression and (mangled) symbol name.\n\n @param[in] data A data object of kind @p\n AMD_COMGR_DATA_KIND_EXECUTABLE or @p AMD_COMGR_DATA_KIND_BC\n\n @param[out] count The number of name expressions mapped. This value\n can be used as an upper bound to the Index provided to the corresponding\n amd_comgr_map_name_expression_to_symbol_name() call.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data is\n an invalid data object or not of kind @p AMD_COMGR_DATA_KIND_EXECUTABLE or\n @p AMD_COMGR_DATA_KIND_BC.\n\n @retval ::AMD_COMGR_STATUS_ERROR LLVM API failure, which should be\n accompanied by an LLVM error message to stderr\n"]
|
||||
pub fn amd_comgr_populate_name_expression_map(
|
||||
data: amd_comgr_data_t,
|
||||
count: *mut usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @brief Fetch a related symbol name for a given name expression;\n or that name's length.\n\n The @p data must have had its name expression map populated with @p\n amd_comgr_populate_name_expression_map.\n\n @param[in] data A data object of kind @p\n AMD_COMGR_DATA_KIND_EXECUTABLE or @p AMD_COMGR_DATA_KIND_BC used to\n identify which map of name expressions to retrieve from.\n\n @param[in, out] size For out, the size of @p symbol_name. For in,\n if @symbol_name is NULL, set to the size of the Nth option string including\n the terminating null character.\n\n @param[in] name_expression A character array of a name expression. This name\n is used as the key to the name expression map in order to locate the desired\n @symbol_name.\n\n @param[out] symbol_name If not NULL, then the first @p size characters of\n the symbol name string mapped from @name_expression are copied into @p\n symbol_name. If NULL, no symbol name string is copied, and only @p size is\n updated (useful in order to find the size of the buffer required to copy the\n symbol_name string).\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function executed successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR @p data object is not valid (NULL or not of\n type bitcode or code object)\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p name_expression is not\n present in the name expression map.\n"]
|
||||
pub fn amd_comgr_map_name_expression_to_symbol_name(
|
||||
data: amd_comgr_data_t,
|
||||
size: *mut usize,
|
||||
name_expression: *mut ::std::os::raw::c_char,
|
||||
symbol_name: *mut ::std::os::raw::c_char,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
#[doc = " @brief A data structure for Code object information."]
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct code_object_info_s {
|
||||
#[doc = " ISA name representing the code object."]
|
||||
pub isa: *const ::std::os::raw::c_char,
|
||||
#[doc = " The size of the code object."]
|
||||
pub size: usize,
|
||||
pub offset: u64,
|
||||
}
|
||||
#[doc = " @brief A data structure for Code object information."]
|
||||
pub type amd_comgr_code_object_info_t = code_object_info_s;
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @ brief Given a bundled code object and list of target id strings, extract\n correponding code object information.\n\n @param[in] data The data object for bundled code object. This should be\n of kind AMD_COMGR_DATA_KIND_FATBIN or AMD_COMGR_DATA_KIND_EXECUTABLE or\n AMD_COMGR_DATA_KIND_BYTES. The API interprets the data object of kind\n AMD_COMGR_DATA_KIND_FATBIN as a clang offload bundle and of kind\n AMD_COMGR_DATA_KIND_EXECUTABLE as an executable shared object. For a data\n object of type AMD_COMGR_DATA_KIND_BYTES the API first inspects the data\n passed to determine if it is a fatbin or an executable and performs\n the lookup.\n\n @param[in, out] info_list A list of code object information structure\n initialized with null terminated target id strings. If the target id\n is matched in the code object bundle the corresponding code object\n information is updated with offset and size of the code object. If the\n target id is not found the offset and size are set to 0.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The code object bundle header is incorrect\n or reading bundle entries failed.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data is not of\n kind AMD_COMGR_DATA_KIND_FATBIN, or AMD_COMGR_DATA_KIND_BYTES or\n AMD_COMGR_DATA_KIND_EXECUTABLE or either @p info_list is NULL.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT if the @p data has\n invalid data."]
|
||||
pub fn amd_comgr_lookup_code_object(
|
||||
data: amd_comgr_data_t,
|
||||
info_list: *mut amd_comgr_code_object_info_t,
|
||||
info_list_size: usize,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
||||
extern "C" {
|
||||
#[must_use]
|
||||
#[doc = " @ brief Given a code object and an ELF virtual address, map the ELF virtual\n address to a code object offset. Also, determine if the ELF virtual address\n maps to an offset in a data region that is defined by the ELF file, but that\n does not occupy bytes in the ELF file. This is typically true of offsets that\n that refer to runtime or heap allocated memory. For ELF files with defined\n sections, these data regions are referred to as NOBITS or .bss sections.\n\n @param[in] data The data object to be inspected for the given ELF virtual\n address. This should be of kind AMD_COMGR_DATA_KIND_EXECUTABLE.\n\n @param[in] elf_virtual_address The address used to calculate the code object\n offset.\n\n @param[out] code_object_offset The code object offset returned to the caller\n based on the given ELF virtual address.\n\n @param[out] slice_size For nobits regions: the size in bytes, starting from\n the provided virtual address up to the end of the segment. In this case, the\n slice size represents the number of contiguous unreadable addresses following\n the provided address.\n\n For bits regions: the size in bytes, starting from the provided virtual\n address up to either the end of the segment, or the start of a NOBITS region.\n In this case, slice size represents the number of contiguous readable\n addresses following the provided address.\n\n @param[out] nobits Set to true if the code object offset points to a location\n in a data region that does not occupy bytes in the ELF file, as described\n above.\n\n @retval ::AMD_COMGR_STATUS_SUCCESS The function has been executed\n successfully.\n\n @retval ::AMD_COMGR_STATUS_ERROR The provided code object has an invalid\n header due to a mismatch in magic, class, data, version, abi, type, or\n machine.\n\n @retval ::AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT @p data is not of\n kind AMD_COMGR_DATA_KIND_EXECUTABLE or invalid, or that the provided @p\n elf_virtual_address is not within the ranges covered by the object's\n load-type program headers."]
|
||||
pub fn amd_comgr_map_elf_virtual_address_to_code_object_offset(
|
||||
data: amd_comgr_data_t,
|
||||
elf_virtual_address: u64,
|
||||
code_object_offset: *mut u64,
|
||||
slice_size: *mut u64,
|
||||
nobits: *mut bool,
|
||||
) -> amd_comgr_status_t;
|
||||
}
|
3
ext/amd_comgr-sys/src/lib.rs
vendored
Normal file
3
ext/amd_comgr-sys/src/lib.rs
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
#![allow(warnings)]
|
||||
pub mod amd_comgr;
|
||||
pub use amd_comgr::*;
|
8
ext/hip_runtime-sys/Cargo.toml
vendored
Normal file
8
ext/hip_runtime-sys/Cargo.toml
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
[package]
|
||||
name = "hip_runtime-sys"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2021"
|
||||
links = "amdhip"
|
||||
|
||||
[lib]
|
40
zluda/build.rs → ext/hip_runtime-sys/build.rs
vendored
40
zluda/build.rs → ext/hip_runtime-sys/build.rs
vendored
@ -1,20 +1,20 @@
|
||||
use env::VarError;
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
// HACK ALERT
|
||||
// This is a temporary hack to to make sure that linker does not pick up
|
||||
// NVIDIA OpenCL .lib using paths injected by cl-sys
|
||||
|
||||
fn main() -> Result<(), VarError> {
|
||||
if cfg!(windows) {
|
||||
let env = env::var("CARGO_CFG_TARGET_ENV")?;
|
||||
if env == "msvc" {
|
||||
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
|
||||
path.push("lib");
|
||||
println!("cargo:rustc-link-search=native={}", path.display());
|
||||
} else {
|
||||
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
|
||||
};
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
use std::env::VarError;
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
fn main() -> Result<(), VarError> {
|
||||
if cfg!(windows) {
|
||||
println!("cargo:rustc-link-lib=dylib=amdhip64_6");
|
||||
let env = env::var("CARGO_CFG_TARGET_ENV")?;
|
||||
if env == "msvc" {
|
||||
let mut path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?);
|
||||
path.push("lib");
|
||||
println!("cargo:rustc-link-search=native={}", path.display());
|
||||
} else {
|
||||
println!("cargo:rustc-link-search=native=C:\\Windows\\System32");
|
||||
};
|
||||
} else {
|
||||
println!("cargo:rustc-link-lib=dylib=amdhip64");
|
||||
println!("cargo:rustc-link-search=native=/opt/rocm/lib/");
|
||||
}
|
||||
Ok(())
|
||||
}
|
567
ext/hip_runtime-sys/lib/amdhip64_6.def
vendored
Normal file
567
ext/hip_runtime-sys/lib/amdhip64_6.def
vendored
Normal file
@ -0,0 +1,567 @@
|
||||
;
|
||||
; Definition file of amdhip64_6.dll
|
||||
; Automatic generated by gendef
|
||||
; written by Kai Tietz 2008
|
||||
;
|
||||
LIBRARY "amdhip64_6.dll"
|
||||
EXPORTS
|
||||
; enum hipError_t __cdecl hipExtModuleLaunchKernel(struct ihipModuleSymbol_t *__ptr64,unsigned int,unsigned int,unsigned int,unsigned int,unsigned int,unsigned int,unsigned __int64,struct ihipStream_t *__ptr64,void *__ptr64 *__ptr64,void *__ptr64 *__ptr64,struct ihipEvent_t *__ptr64,struct ihipEvent_t *__ptr64,unsigned int)
|
||||
?hipExtModuleLaunchKernel@@YA?AW4hipError_t@@PEAUihipModuleSymbol_t@@IIIIII_KPEAUihipStream_t@@PEAPEAX3PEAUihipEvent_t@@4I@Z
|
||||
hipExternalMemoryGetMappedMipmappedArray
|
||||
hipGraphAddExternalSemaphoresSignalNode
|
||||
hipGraphAddExternalSemaphoresWaitNode
|
||||
hipGraphExecExternalSemaphoresSignalNodeSetParams
|
||||
hipGraphExecExternalSemaphoresWaitNodeSetParams
|
||||
hipGraphExternalSemaphoresSignalNodeGetParams
|
||||
hipGraphExternalSemaphoresSignalNodeSetParams
|
||||
hipGraphExternalSemaphoresWaitNodeGetParams
|
||||
hipGraphExternalSemaphoresWaitNodeSetParams
|
||||
; enum hipError_t __cdecl hipHccModuleLaunchKernel(struct ihipModuleSymbol_t *__ptr64,unsigned int,unsigned int,unsigned int,unsigned int,unsigned int,unsigned int,unsigned __int64,struct ihipStream_t *__ptr64,void *__ptr64 *__ptr64,void *__ptr64 *__ptr64,struct ihipEvent_t *__ptr64,struct ihipEvent_t *__ptr64)
|
||||
?hipHccModuleLaunchKernel@@YA?AW4hipError_t@@PEAUihipModuleSymbol_t@@IIIIII_KPEAUihipStream_t@@PEAPEAX3PEAUihipEvent_t@@4@Z
|
||||
hipTexRefGetArray
|
||||
hipTexRefGetBorderColor
|
||||
AMD_CPU_AFFINITY DATA
|
||||
AMD_DIRECT_DISPATCH DATA
|
||||
AMD_GPU_FORCE_SINGLE_FP_DENORM DATA
|
||||
AMD_LOG_LEVEL DATA
|
||||
AMD_LOG_LEVEL_FILE DATA
|
||||
AMD_LOG_MASK DATA
|
||||
AMD_OCL_BUILD_OPTIONS DATA
|
||||
AMD_OCL_BUILD_OPTIONS_APPEND DATA
|
||||
AMD_OCL_LINK_OPTIONS DATA
|
||||
AMD_OCL_LINK_OPTIONS_APPEND DATA
|
||||
AMD_OCL_WAIT_COMMAND DATA
|
||||
AMD_OPT_FLUSH DATA
|
||||
AMD_SERIALIZE_COPY DATA
|
||||
AMD_SERIALIZE_KERNEL DATA
|
||||
AMD_THREAD_TRACE_ENABLE DATA
|
||||
CL_KHR_FP64 DATA
|
||||
CQ_THREAD_STACK_SIZE DATA
|
||||
CUDA_VISIBLE_DEVICES DATA
|
||||
DEBUG_CLR_GRAPH_PACKET_CAPTURE DATA
|
||||
DEBUG_CLR_LIMIT_BLIT_WG DATA
|
||||
DEBUG_HIP_GRAPH_DOT_PRINT DATA
|
||||
DISABLE_DEFERRED_ALLOC DATA
|
||||
GPU_ADD_HBCC_SIZE DATA
|
||||
GPU_ANALYZE_HANG DATA
|
||||
GPU_BLIT_ENGINE_TYPE DATA
|
||||
GPU_CP_DMA_COPY_SIZE DATA
|
||||
GPU_DEBUG_ENABLE DATA
|
||||
GPU_DEVICE_ORDINAL DATA
|
||||
GPU_DUMP_BLIT_KERNELS DATA
|
||||
GPU_DUMP_CODE_OBJECT DATA
|
||||
GPU_ENABLE_COOP_GROUPS DATA
|
||||
GPU_ENABLE_HW_P2P DATA
|
||||
GPU_ENABLE_LC DATA
|
||||
GPU_ENABLE_PAL DATA
|
||||
GPU_ENABLE_WAVE32_MODE DATA
|
||||
GPU_ENABLE_WGP_MODE DATA
|
||||
GPU_FLUSH_ON_EXECUTION DATA
|
||||
GPU_FORCE_BLIT_COPY_SIZE DATA
|
||||
GPU_FORCE_QUEUE_PROFILING DATA
|
||||
GPU_IMAGE_BUFFER_WAR DATA
|
||||
GPU_IMAGE_DMA DATA
|
||||
GPU_MAX_COMMAND_BUFFERS DATA
|
||||
GPU_MAX_HEAP_SIZE DATA
|
||||
GPU_MAX_HW_QUEUES DATA
|
||||
GPU_MAX_REMOTE_MEM_SIZE DATA
|
||||
GPU_MAX_SUBALLOC_SIZE DATA
|
||||
GPU_MAX_USWC_ALLOC_SIZE DATA
|
||||
GPU_MAX_WORKGROUP_SIZE DATA
|
||||
GPU_MIPMAP DATA
|
||||
GPU_NUM_COMPUTE_RINGS DATA
|
||||
GPU_NUM_MEM_DEPENDENCY DATA
|
||||
GPU_PINNED_MIN_XFER_SIZE DATA
|
||||
GPU_PINNED_XFER_SIZE DATA
|
||||
GPU_PRINT_CHILD_KERNEL DATA
|
||||
GPU_RESOURCE_CACHE_SIZE DATA
|
||||
GPU_SINGLE_ALLOC_PERCENT DATA
|
||||
GPU_STAGING_BUFFER_SIZE DATA
|
||||
GPU_STREAMOPS_CP_WAIT DATA
|
||||
GPU_USE_DEVICE_QUEUE DATA
|
||||
GPU_WAVES_PER_SIMD DATA
|
||||
GPU_XFER_BUFFER_SIZE DATA
|
||||
HIPRTC_COMPILE_OPTIONS_APPEND DATA
|
||||
HIPRTC_LINK_OPTIONS_APPEND DATA
|
||||
HIPRTC_USE_RUNTIME_UNBUNDLER DATA
|
||||
HIP_FORCE_DEV_KERNARG DATA
|
||||
HIP_HIDDEN_FREE_MEM DATA
|
||||
HIP_HOST_COHERENT DATA
|
||||
HIP_INITIAL_DM_SIZE DATA
|
||||
HIP_LAUNCH_BLOCKING DATA
|
||||
HIP_MEM_POOL_SUPPORT DATA
|
||||
HIP_MEM_POOL_USE_VM DATA
|
||||
HIP_USE_RUNTIME_UNBUNDLER DATA
|
||||
HIP_VISIBLE_DEVICES DATA
|
||||
HIP_VMEM_MANAGE_SUPPORT DATA
|
||||
HSA_KERNARG_POOL_SIZE DATA
|
||||
HSA_LOCAL_MEMORY_ENABLE DATA
|
||||
OCL_SET_SVM_SIZE DATA
|
||||
OCL_STUB_PROGRAMS DATA
|
||||
OPENCL_VERSION DATA
|
||||
PAL_ALWAYS_RESIDENT DATA
|
||||
PAL_DISABLE_SDMA DATA
|
||||
PAL_EMBED_KERNEL_MD DATA
|
||||
PAL_FORCE_ASIC_REVISION DATA
|
||||
PAL_HIP_IPC_FLAG DATA
|
||||
PAL_MALL_POLICY DATA
|
||||
PAL_PREPINNED_MEMORY_SIZE DATA
|
||||
PAL_RGP_DISP_COUNT DATA
|
||||
REMOTE_ALLOC DATA
|
||||
ROC_ACTIVE_WAIT_TIMEOUT DATA
|
||||
ROC_AQL_QUEUE_SIZE DATA
|
||||
ROC_CPU_WAIT_FOR_SIGNAL DATA
|
||||
ROC_ENABLE_LARGE_BAR DATA
|
||||
ROC_GLOBAL_CU_MASK DATA
|
||||
ROC_HMM_FLAGS DATA
|
||||
ROC_P2P_SDMA_SIZE DATA
|
||||
ROC_SIGNAL_POOL_SIZE DATA
|
||||
ROC_SKIP_KERNEL_ARG_COPY DATA
|
||||
ROC_SYSTEM_SCOPE_SIGNAL DATA
|
||||
ROC_USE_FGS_KERNARG DATA
|
||||
__gnu_f2h_ieee
|
||||
__gnu_h2f_ieee
|
||||
__hipPopCallConfiguration
|
||||
__hipPushCallConfiguration
|
||||
__hipRegisterFatBinary
|
||||
__hipRegisterFunction
|
||||
__hipRegisterManagedVar
|
||||
__hipRegisterSurface
|
||||
__hipRegisterTexture
|
||||
__hipRegisterVar
|
||||
__hipUnregisterFatBinary
|
||||
amd_dbgapi_get_build_id
|
||||
amd_dbgapi_get_build_name
|
||||
amd_dbgapi_get_git_hash
|
||||
hipApiName
|
||||
hipArray3DCreate
|
||||
hipArray3DGetDescriptor
|
||||
hipArrayCreate
|
||||
hipArrayDestroy
|
||||
hipArrayGetDescriptor
|
||||
hipArrayGetInfo
|
||||
hipBindTexture
|
||||
hipBindTexture2D
|
||||
hipBindTextureToArray
|
||||
hipBindTextureToMipmappedArray
|
||||
hipChooseDevice
|
||||
hipChooseDeviceR0000
|
||||
hipChooseDeviceR0600
|
||||
hipConfigureCall
|
||||
hipCreateChannelDesc
|
||||
hipCreateSurfaceObject
|
||||
hipCreateTextureObject
|
||||
hipCtxCreate
|
||||
hipCtxDestroy
|
||||
hipCtxDisablePeerAccess
|
||||
hipCtxEnablePeerAccess
|
||||
hipCtxGetApiVersion
|
||||
hipCtxGetCacheConfig
|
||||
hipCtxGetCurrent
|
||||
hipCtxGetDevice
|
||||
hipCtxGetFlags
|
||||
hipCtxGetSharedMemConfig
|
||||
hipCtxPopCurrent
|
||||
hipCtxPushCurrent
|
||||
hipCtxSetCacheConfig
|
||||
hipCtxSetCurrent
|
||||
hipCtxSetSharedMemConfig
|
||||
hipCtxSynchronize
|
||||
hipDestroyExternalMemory
|
||||
hipDestroyExternalSemaphore
|
||||
hipDestroySurfaceObject
|
||||
hipDestroyTextureObject
|
||||
hipDeviceCanAccessPeer
|
||||
hipDeviceComputeCapability
|
||||
hipDeviceDisablePeerAccess
|
||||
hipDeviceEnablePeerAccess
|
||||
hipDeviceGet
|
||||
hipDeviceGetAttribute
|
||||
hipDeviceGetByPCIBusId
|
||||
hipDeviceGetCacheConfig
|
||||
hipDeviceGetDefaultMemPool
|
||||
hipDeviceGetGraphMemAttribute
|
||||
hipDeviceGetLimit
|
||||
hipDeviceGetMemPool
|
||||
hipDeviceGetName
|
||||
hipDeviceGetP2PAttribute
|
||||
hipDeviceGetPCIBusId
|
||||
hipDeviceGetSharedMemConfig
|
||||
hipDeviceGetStreamPriorityRange
|
||||
hipDeviceGetUuid
|
||||
hipDeviceGraphMemTrim
|
||||
hipDevicePrimaryCtxGetState
|
||||
hipDevicePrimaryCtxRelease
|
||||
hipDevicePrimaryCtxReset
|
||||
hipDevicePrimaryCtxRetain
|
||||
hipDevicePrimaryCtxSetFlags
|
||||
hipDeviceReset
|
||||
hipDeviceSetCacheConfig
|
||||
hipDeviceSetGraphMemAttribute
|
||||
hipDeviceSetLimit
|
||||
hipDeviceSetMemPool
|
||||
hipDeviceSetSharedMemConfig
|
||||
hipDeviceSynchronize
|
||||
hipDeviceTotalMem
|
||||
hipDriverGetVersion
|
||||
hipDrvGetErrorName
|
||||
hipDrvGetErrorString
|
||||
hipDrvGraphAddMemcpyNode
|
||||
hipDrvGraphAddMemsetNode
|
||||
hipDrvMemcpy2DUnaligned
|
||||
hipDrvMemcpy3D
|
||||
hipDrvMemcpy3DAsync
|
||||
hipDrvPointerGetAttributes
|
||||
hipEventCreate
|
||||
hipEventCreateWithFlags
|
||||
hipEventDestroy
|
||||
hipEventElapsedTime
|
||||
hipEventQuery
|
||||
hipEventRecord
|
||||
hipEventRecord_spt
|
||||
hipEventSynchronize
|
||||
hipExtGetLastError
|
||||
hipExtGetLinkTypeAndHopCount
|
||||
hipExtLaunchKernel
|
||||
hipExtLaunchMultiKernelMultiDevice
|
||||
hipExtMallocWithFlags
|
||||
hipExtModuleLaunchKernel
|
||||
hipExtStreamCreateWithCUMask
|
||||
hipExtStreamGetCUMask
|
||||
hipExternalMemoryGetMappedBuffer
|
||||
hipFree
|
||||
hipFreeArray
|
||||
hipFreeAsync
|
||||
hipFreeHost
|
||||
hipFreeMipmappedArray
|
||||
hipFuncGetAttribute
|
||||
hipFuncGetAttributes
|
||||
hipFuncSetAttribute
|
||||
hipFuncSetCacheConfig
|
||||
hipFuncSetSharedMemConfig
|
||||
hipGLGetDevices
|
||||
hipGetChannelDesc
|
||||
hipGetCmdName
|
||||
hipGetDevice
|
||||
hipGetDeviceCount
|
||||
hipGetDeviceFlags
|
||||
hipGetDeviceProperties
|
||||
hipGetDevicePropertiesR0000
|
||||
hipGetDevicePropertiesR0600
|
||||
hipGetErrorName
|
||||
hipGetErrorString
|
||||
hipGetLastError
|
||||
hipGetMipmappedArrayLevel
|
||||
hipGetStreamDeviceId
|
||||
hipGetSymbolAddress
|
||||
hipGetSymbolSize
|
||||
hipGetTextureAlignmentOffset
|
||||
hipGetTextureObjectResourceDesc
|
||||
hipGetTextureObjectResourceViewDesc
|
||||
hipGetTextureObjectTextureDesc
|
||||
hipGetTextureReference
|
||||
hipGraphAddChildGraphNode
|
||||
hipGraphAddDependencies
|
||||
hipGraphAddEmptyNode
|
||||
hipGraphAddEventRecordNode
|
||||
hipGraphAddEventWaitNode
|
||||
hipGraphAddHostNode
|
||||
hipGraphAddKernelNode
|
||||
hipGraphAddMemAllocNode
|
||||
hipGraphAddMemFreeNode
|
||||
hipGraphAddMemcpyNode
|
||||
hipGraphAddMemcpyNode1D
|
||||
hipGraphAddMemcpyNodeFromSymbol
|
||||
hipGraphAddMemcpyNodeToSymbol
|
||||
hipGraphAddMemsetNode
|
||||
hipGraphChildGraphNodeGetGraph
|
||||
hipGraphClone
|
||||
hipGraphCreate
|
||||
hipGraphDebugDotPrint
|
||||
hipGraphDestroy
|
||||
hipGraphDestroyNode
|
||||
hipGraphEventRecordNodeGetEvent
|
||||
hipGraphEventRecordNodeSetEvent
|
||||
hipGraphEventWaitNodeGetEvent
|
||||
hipGraphEventWaitNodeSetEvent
|
||||
hipGraphExecChildGraphNodeSetParams
|
||||
hipGraphExecDestroy
|
||||
hipGraphExecEventRecordNodeSetEvent
|
||||
hipGraphExecEventWaitNodeSetEvent
|
||||
hipGraphExecHostNodeSetParams
|
||||
hipGraphExecKernelNodeSetParams
|
||||
hipGraphExecMemcpyNodeSetParams
|
||||
hipGraphExecMemcpyNodeSetParams1D
|
||||
hipGraphExecMemcpyNodeSetParamsFromSymbol
|
||||
hipGraphExecMemcpyNodeSetParamsToSymbol
|
||||
hipGraphExecMemsetNodeSetParams
|
||||
hipGraphExecUpdate
|
||||
hipGraphGetEdges
|
||||
hipGraphGetNodes
|
||||
hipGraphGetRootNodes
|
||||
hipGraphHostNodeGetParams
|
||||
hipGraphHostNodeSetParams
|
||||
hipGraphInstantiate
|
||||
hipGraphInstantiateWithFlags
|
||||
hipGraphKernelNodeCopyAttributes
|
||||
hipGraphKernelNodeGetAttribute
|
||||
hipGraphKernelNodeGetParams
|
||||
hipGraphKernelNodeSetAttribute
|
||||
hipGraphKernelNodeSetParams
|
||||
hipGraphLaunch
|
||||
hipGraphLaunch_spt
|
||||
hipGraphMemAllocNodeGetParams
|
||||
hipGraphMemFreeNodeGetParams
|
||||
hipGraphMemcpyNodeGetParams
|
||||
hipGraphMemcpyNodeSetParams
|
||||
hipGraphMemcpyNodeSetParams1D
|
||||
hipGraphMemcpyNodeSetParamsFromSymbol
|
||||
hipGraphMemcpyNodeSetParamsToSymbol
|
||||
hipGraphMemsetNodeGetParams
|
||||
hipGraphMemsetNodeSetParams
|
||||
hipGraphNodeFindInClone
|
||||
hipGraphNodeGetDependencies
|
||||
hipGraphNodeGetDependentNodes
|
||||
hipGraphNodeGetEnabled
|
||||
hipGraphNodeGetType
|
||||
hipGraphNodeSetEnabled
|
||||
hipGraphReleaseUserObject
|
||||
hipGraphRemoveDependencies
|
||||
hipGraphRetainUserObject
|
||||
hipGraphUpload
|
||||
hipGraphicsGLRegisterBuffer
|
||||
hipGraphicsGLRegisterImage
|
||||
hipGraphicsMapResources
|
||||
hipGraphicsResourceGetMappedPointer
|
||||
hipGraphicsSubResourceGetMappedArray
|
||||
hipGraphicsUnmapResources
|
||||
hipGraphicsUnregisterResource
|
||||
hipHccModuleLaunchKernel
|
||||
hipHostAlloc
|
||||
hipHostFree
|
||||
hipHostGetDevicePointer
|
||||
hipHostGetFlags
|
||||
hipHostMalloc
|
||||
hipHostRegister
|
||||
hipHostUnregister
|
||||
hipImportExternalMemory
|
||||
hipImportExternalSemaphore
|
||||
hipInit
|
||||
hipIpcCloseMemHandle
|
||||
hipIpcGetEventHandle
|
||||
hipIpcGetMemHandle
|
||||
hipIpcOpenEventHandle
|
||||
hipIpcOpenMemHandle
|
||||
hipKernelNameRef
|
||||
hipLaunchByPtr
|
||||
hipLaunchCooperativeKernel
|
||||
hipLaunchCooperativeKernelMultiDevice
|
||||
hipLaunchCooperativeKernel_spt
|
||||
hipLaunchHostFunc
|
||||
hipLaunchHostFunc_spt
|
||||
hipLaunchKernel
|
||||
hipLaunchKernel_spt
|
||||
hipMalloc
|
||||
hipMalloc3D
|
||||
hipMalloc3DArray
|
||||
hipMallocArray
|
||||
hipMallocAsync
|
||||
hipMallocFromPoolAsync
|
||||
hipMallocHost
|
||||
hipMallocManaged
|
||||
hipMallocMipmappedArray
|
||||
hipMallocPitch
|
||||
hipMemAddressFree
|
||||
hipMemAddressReserve
|
||||
hipMemAdvise
|
||||
hipMemAllocHost
|
||||
hipMemAllocPitch
|
||||
hipMemCreate
|
||||
hipMemExportToShareableHandle
|
||||
hipMemGetAccess
|
||||
hipMemGetAddressRange
|
||||
hipMemGetAllocationGranularity
|
||||
hipMemGetAllocationPropertiesFromHandle
|
||||
hipMemGetInfo
|
||||
hipMemImportFromShareableHandle
|
||||
hipMemMap
|
||||
hipMemMapArrayAsync
|
||||
hipMemPoolCreate
|
||||
hipMemPoolDestroy
|
||||
hipMemPoolExportPointer
|
||||
hipMemPoolExportToShareableHandle
|
||||
hipMemPoolGetAccess
|
||||
hipMemPoolGetAttribute
|
||||
hipMemPoolImportFromShareableHandle
|
||||
hipMemPoolImportPointer
|
||||
hipMemPoolSetAccess
|
||||
hipMemPoolSetAttribute
|
||||
hipMemPoolTrimTo
|
||||
hipMemPrefetchAsync
|
||||
hipMemPtrGetInfo
|
||||
hipMemRangeGetAttribute
|
||||
hipMemRangeGetAttributes
|
||||
hipMemRelease
|
||||
hipMemRetainAllocationHandle
|
||||
hipMemSetAccess
|
||||
hipMemUnmap
|
||||
hipMemcpy
|
||||
hipMemcpy2D
|
||||
hipMemcpy2DAsync
|
||||
hipMemcpy2DAsync_spt
|
||||
hipMemcpy2DFromArray
|
||||
hipMemcpy2DFromArrayAsync
|
||||
hipMemcpy2DFromArrayAsync_spt
|
||||
hipMemcpy2DFromArray_spt
|
||||
hipMemcpy2DToArray
|
||||
hipMemcpy2DToArrayAsync
|
||||
hipMemcpy2DToArrayAsync_spt
|
||||
hipMemcpy2DToArray_spt
|
||||
hipMemcpy2D_spt
|
||||
hipMemcpy3D
|
||||
hipMemcpy3DAsync
|
||||
hipMemcpy3DAsync_spt
|
||||
hipMemcpy3D_spt
|
||||
hipMemcpyAsync
|
||||
hipMemcpyAsync_spt
|
||||
hipMemcpyAtoH
|
||||
hipMemcpyDtoD
|
||||
hipMemcpyDtoDAsync
|
||||
hipMemcpyDtoH
|
||||
hipMemcpyDtoHAsync
|
||||
hipMemcpyFromArray
|
||||
hipMemcpyFromArray_spt
|
||||
hipMemcpyFromSymbol
|
||||
hipMemcpyFromSymbolAsync
|
||||
hipMemcpyFromSymbolAsync_spt
|
||||
hipMemcpyFromSymbol_spt
|
||||
hipMemcpyHtoA
|
||||
hipMemcpyHtoD
|
||||
hipMemcpyHtoDAsync
|
||||
hipMemcpyParam2D
|
||||
hipMemcpyParam2DAsync
|
||||
hipMemcpyPeer
|
||||
hipMemcpyPeerAsync
|
||||
hipMemcpyToArray
|
||||
hipMemcpyToSymbol
|
||||
hipMemcpyToSymbolAsync
|
||||
hipMemcpyToSymbolAsync_spt
|
||||
hipMemcpyToSymbol_spt
|
||||
hipMemcpyWithStream
|
||||
hipMemcpy_spt
|
||||
hipMemset
|
||||
hipMemset2D
|
||||
hipMemset2DAsync
|
||||
hipMemset2DAsync_spt
|
||||
hipMemset2D_spt
|
||||
hipMemset3D
|
||||
hipMemset3DAsync
|
||||
hipMemset3DAsync_spt
|
||||
hipMemset3D_spt
|
||||
hipMemsetAsync
|
||||
hipMemsetAsync_spt
|
||||
hipMemsetD16
|
||||
hipMemsetD16Async
|
||||
hipMemsetD32
|
||||
hipMemsetD32Async
|
||||
hipMemsetD8
|
||||
hipMemsetD8Async
|
||||
hipMemset_spt
|
||||
hipMipmappedArrayCreate
|
||||
hipMipmappedArrayDestroy
|
||||
hipMipmappedArrayGetLevel
|
||||
hipModuleGetFunction
|
||||
hipModuleGetGlobal
|
||||
hipModuleGetTexRef
|
||||
hipModuleLaunchCooperativeKernel
|
||||
hipModuleLaunchCooperativeKernelMultiDevice
|
||||
hipModuleLaunchKernel
|
||||
hipModuleLoad
|
||||
hipModuleLoadData
|
||||
hipModuleLoadDataEx
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
|
||||
hipModuleOccupancyMaxPotentialBlockSize
|
||||
hipModuleOccupancyMaxPotentialBlockSizeWithFlags
|
||||
hipModuleUnload
|
||||
hipOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
|
||||
hipOccupancyMaxPotentialBlockSize
|
||||
hipPeekAtLastError
|
||||
hipPointerGetAttribute
|
||||
hipPointerGetAttributes
|
||||
hipProfilerStart
|
||||
hipProfilerStop
|
||||
hipRegisterTracerCallback
|
||||
hipRuntimeGetVersion
|
||||
hipSetDevice
|
||||
hipSetDeviceFlags
|
||||
hipSetupArgument
|
||||
hipSignalExternalSemaphoresAsync
|
||||
hipStreamAddCallback
|
||||
hipStreamAddCallback_spt
|
||||
hipStreamAttachMemAsync
|
||||
hipStreamBeginCapture
|
||||
hipStreamBeginCapture_spt
|
||||
hipStreamCreate
|
||||
hipStreamCreateWithFlags
|
||||
hipStreamCreateWithPriority
|
||||
hipStreamDestroy
|
||||
hipStreamEndCapture
|
||||
hipStreamEndCapture_spt
|
||||
hipStreamGetCaptureInfo
|
||||
hipStreamGetCaptureInfo_spt
|
||||
hipStreamGetCaptureInfo_v2
|
||||
hipStreamGetCaptureInfo_v2_spt
|
||||
hipStreamGetDevice
|
||||
hipStreamGetFlags
|
||||
hipStreamGetFlags_spt
|
||||
hipStreamGetPriority
|
||||
hipStreamGetPriority_spt
|
||||
hipStreamIsCapturing
|
||||
hipStreamIsCapturing_spt
|
||||
hipStreamQuery
|
||||
hipStreamQuery_spt
|
||||
hipStreamSynchronize
|
||||
hipStreamSynchronize_spt
|
||||
hipStreamUpdateCaptureDependencies
|
||||
hipStreamWaitEvent
|
||||
hipStreamWaitEvent_spt
|
||||
hipStreamWaitValue32
|
||||
hipStreamWaitValue64
|
||||
hipStreamWriteValue32
|
||||
hipStreamWriteValue64
|
||||
hipTexObjectCreate
|
||||
hipTexObjectDestroy
|
||||
hipTexObjectGetResourceDesc
|
||||
hipTexObjectGetResourceViewDesc
|
||||
hipTexObjectGetTextureDesc
|
||||
hipTexRefGetAddress
|
||||
hipTexRefGetAddressMode
|
||||
hipTexRefGetFilterMode
|
||||
hipTexRefGetFlags
|
||||
hipTexRefGetFormat
|
||||
hipTexRefGetMaxAnisotropy
|
||||
hipTexRefGetMipMappedArray
|
||||
hipTexRefGetMipmapFilterMode
|
||||
hipTexRefGetMipmapLevelBias
|
||||
hipTexRefGetMipmapLevelClamp
|
||||
hipTexRefSetAddress
|
||||
hipTexRefSetAddress2D
|
||||
hipTexRefSetAddressMode
|
||||
hipTexRefSetArray
|
||||
hipTexRefSetBorderColor
|
||||
hipTexRefSetFilterMode
|
||||
hipTexRefSetFlags
|
||||
hipTexRefSetFormat
|
||||
hipTexRefSetMaxAnisotropy
|
||||
hipTexRefSetMipmapFilterMode
|
||||
hipTexRefSetMipmapLevelBias
|
||||
hipTexRefSetMipmapLevelClamp
|
||||
hipTexRefSetMipmappedArray
|
||||
hipThreadExchangeStreamCaptureMode
|
||||
hipUnbindTexture
|
||||
hipUserObjectCreate
|
||||
hipUserObjectRelease
|
||||
hipUserObjectRetain
|
||||
hipWaitExternalSemaphoresAsync
|
BIN
ext/hip_runtime-sys/lib/amdhip64_6.lib
vendored
Normal file
BIN
ext/hip_runtime-sys/lib/amdhip64_6.lib
vendored
Normal file
Binary file not shown.
12668
ext/hip_runtime-sys/src/lib.rs
vendored
Normal file
12668
ext/hip_runtime-sys/src/lib.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1
ext/llvm-project
vendored
Submodule
1
ext/llvm-project
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 6009708b4367171ccdbf4b5905cb6a803753fe18
|
1
ext/spirv-headers
vendored
1
ext/spirv-headers
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 308bd07424350a6000f35a77b5f85cd4f3da319e
|
1
ext/spirv-tools
vendored
1
ext/spirv-tools
vendored
@ -1 +0,0 @@
|
||||
Subproject commit e128ab0d624ce7beb08eb9656bb260c597a46d0a
|
1
geekbench.svg
Normal file
1
geekbench.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 287 KiB |
@ -1,8 +0,0 @@
|
||||
[package]
|
||||
name = "level_zero-sys"
|
||||
version = "1.0.4"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
links = "ze_loader"
|
||||
|
||||
[lib]
|
@ -1,4 +0,0 @@
|
||||
sed 's/^typedef uint32_t ze_.*flags_t;$//g' /usr/local/include/level_zero/ze_api.h > ze_api.h
|
||||
sed -i -r 's/ze_(.*)_flag_t/ze_\1_flags_t/g' ze_api.h
|
||||
bindgen --size_t-is-usize --default-enum-style=newtype --bitfield-enum ".*flags_t" --whitelist-function "ze.*" ze_api.h -o ze_api.rs
|
||||
sed -i 's/pub struct _ze_result_t/#[must_use]\npub struct _ze_result_t/g' ze_api.rs
|
Binary file not shown.
Binary file not shown.
@ -1,3 +0,0 @@
|
||||
#![allow(warnings)]
|
||||
pub mod ze_api;
|
||||
pub use ze_api::*;
|
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
||||
[package]
|
||||
name = "level_zero"
|
||||
version = "0.1.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
level_zero-sys = { path = "../level_zero-sys" }
|
||||
|
||||
[dependencies.ocl-core]
|
||||
version = "0.11"
|
||||
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]
|
@ -1 +0,0 @@
|
||||
More ergonomic bindings for oneAPI Level Zero
|
@ -1,4 +0,0 @@
|
||||
pub use level_zero_sys as sys;
|
||||
|
||||
pub mod ze;
|
||||
pub use ze::*;
|
@ -1,947 +0,0 @@
|
||||
use crate::sys;
|
||||
use std::{
|
||||
ffi::{c_void, CStr, CString},
|
||||
fmt::Debug,
|
||||
marker::PhantomData,
|
||||
mem, ptr,
|
||||
};
|
||||
|
||||
macro_rules! check {
|
||||
($expr:expr) => {
|
||||
#[allow(unused_unsafe)]
|
||||
{
|
||||
let err = unsafe { $expr };
|
||||
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return Result::Err(err);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! check_panic {
|
||||
($expr:expr) => {
|
||||
let err = unsafe { $expr };
|
||||
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
panic!(err);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, sys::ze_result_t>;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Error(pub sys::ze_result_t);
|
||||
|
||||
pub fn init() -> Result<()> {
|
||||
match unsafe { sys::zeInit(sys::ze_init_flags_t::ZE_INIT_FLAG_GPU_ONLY) } {
|
||||
sys::ze_result_t::ZE_RESULT_SUCCESS => Ok(()),
|
||||
e => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct Driver(sys::ze_driver_handle_t);
|
||||
|
||||
unsafe impl Send for Driver {}
|
||||
unsafe impl Sync for Driver {}
|
||||
|
||||
impl Driver {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_driver_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_driver_handle_t) -> Self {
|
||||
Self(x)
|
||||
}
|
||||
|
||||
pub fn get() -> Result<Vec<Self>> {
|
||||
let mut len = 0;
|
||||
let mut temp = ptr::null_mut();
|
||||
check!(sys::zeDriverGet(&mut len, &mut temp));
|
||||
let mut result = (0..len)
|
||||
.map(|_| Driver(ptr::null_mut()))
|
||||
.collect::<Vec<_>>();
|
||||
check!(sys::zeDriverGet(&mut len, result.as_mut_ptr() as *mut _));
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn devices(&self) -> Result<Vec<Device>> {
|
||||
let mut len = 0;
|
||||
let mut temp = ptr::null_mut();
|
||||
check!(sys::zeDeviceGet(self.0, &mut len, &mut temp));
|
||||
let mut result = (0..len)
|
||||
.map(|_| Device(ptr::null_mut()))
|
||||
.collect::<Vec<_>>();
|
||||
check!(sys::zeDeviceGet(
|
||||
self.0,
|
||||
&mut len,
|
||||
result.as_mut_ptr() as *mut _
|
||||
));
|
||||
if (len as usize) < result.len() {
|
||||
result.truncate(len as usize);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct Device(sys::ze_device_handle_t);
|
||||
|
||||
impl Device {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_device_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_device_handle_t) -> Self {
|
||||
Self(x)
|
||||
}
|
||||
|
||||
pub fn get_properties(&self) -> Result<Box<sys::ze_device_properties_t>> {
|
||||
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_device_properties_t>() });
|
||||
check! { sys::zeDeviceGetProperties(self.0, props.as_mut()) };
|
||||
Ok(props)
|
||||
}
|
||||
|
||||
pub fn get_image_properties(&self) -> Result<Box<sys::ze_device_image_properties_t>> {
|
||||
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_device_image_properties_t>() });
|
||||
check! { sys::zeDeviceGetImageProperties(self.0, props.as_mut()) };
|
||||
Ok(props)
|
||||
}
|
||||
|
||||
pub fn get_memory_properties(&self) -> Result<Vec<sys::ze_device_memory_properties_t>> {
|
||||
let mut count = 0u32;
|
||||
check! { sys::zeDeviceGetMemoryProperties(self.0, &mut count, ptr::null_mut()) };
|
||||
if count == 0 {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let mut props =
|
||||
vec![unsafe { mem::zeroed::<sys::ze_device_memory_properties_t>() }; count as usize];
|
||||
check! { sys::zeDeviceGetMemoryProperties(self.0, &mut count, props.as_mut_ptr()) };
|
||||
Ok(props)
|
||||
}
|
||||
|
||||
pub fn get_compute_properties(&self) -> Result<Box<sys::ze_device_compute_properties_t>> {
|
||||
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_device_compute_properties_t>() });
|
||||
check! { sys::zeDeviceGetComputeProperties(self.0, props.as_mut()) };
|
||||
Ok(props)
|
||||
}
|
||||
|
||||
pub unsafe fn mem_alloc_device(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
size: usize,
|
||||
alignment: usize,
|
||||
) -> Result<*mut c_void> {
|
||||
let descr = sys::ze_device_mem_alloc_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_device_mem_alloc_flags_t(0),
|
||||
ordinal: 0,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
// TODO: check current context for the device
|
||||
check! {
|
||||
sys::zeMemAllocDevice(
|
||||
ctx.0,
|
||||
&descr,
|
||||
size,
|
||||
alignment,
|
||||
self.0,
|
||||
&mut result,
|
||||
)
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct Context(sys::ze_context_handle_t);
|
||||
|
||||
impl Context {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_context_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_context_handle_t) -> Self {
|
||||
Self(x)
|
||||
}
|
||||
|
||||
pub fn new(drv: &Driver) -> Result<Self> {
|
||||
let ctx_desc = sys::ze_context_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_CONTEXT_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_context_flags_t(0),
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeContextCreate(drv.0, &ctx_desc, &mut result));
|
||||
Ok(Context(result))
|
||||
}
|
||||
|
||||
pub unsafe fn mem_free(&mut self, ptr: *mut c_void) -> Result<()> {
|
||||
check! {
|
||||
sys::zeMemFree(
|
||||
self.0,
|
||||
ptr,
|
||||
)
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Context {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeContextDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct CommandQueue(sys::ze_command_queue_handle_t);
|
||||
|
||||
impl CommandQueue {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_command_queue_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_command_queue_handle_t) -> Self {
|
||||
Self(x)
|
||||
}
|
||||
|
||||
pub fn new(ctx: &mut Context, d: &Device) -> Result<Self> {
|
||||
let que_desc = sys::ze_command_queue_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
|
||||
pNext: ptr::null(),
|
||||
ordinal: 0,
|
||||
index: 0,
|
||||
flags: sys::ze_command_queue_flags_t(0),
|
||||
mode: sys::ze_command_queue_mode_t::ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
||||
priority: sys::ze_command_queue_priority_t::ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeCommandQueueCreate(
|
||||
ctx.0,
|
||||
d.0,
|
||||
&que_desc,
|
||||
&mut result
|
||||
));
|
||||
Ok(CommandQueue(result))
|
||||
}
|
||||
|
||||
pub fn execute<'a>(&'a self, cmd: CommandList) -> Result<FenceGuard<'a>> {
|
||||
check!(sys::zeCommandListClose(cmd.0));
|
||||
let result = FenceGuard::new(self, cmd.0)?;
|
||||
let mut raw_cmd = cmd.0;
|
||||
mem::forget(cmd);
|
||||
check!(sys::zeCommandQueueExecuteCommandLists(
|
||||
self.0,
|
||||
1,
|
||||
&mut raw_cmd,
|
||||
result.0
|
||||
));
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CommandQueue {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeCommandQueueDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Module(sys::ze_module_handle_t);
|
||||
|
||||
impl Module {
|
||||
// HACK ALERT
|
||||
// We use OpenCL for now to do SPIR-V linking, because Level0
|
||||
// does not allow linking. Don't let presence of zeModuleDynamicLink fool
|
||||
// you, it's not currently possible to create non-compiled modules.
|
||||
// zeModuleCreate always compiles (builds and links).
|
||||
pub fn build_link_spirv<'a>(
|
||||
ctx: &mut Context,
|
||||
d: &Device,
|
||||
binaries: &[&'a [u8]],
|
||||
opts: Option<&CStr>,
|
||||
) -> (Result<Self>, Option<BuildLog>) {
|
||||
let ocl_program = match Self::build_link_spirv_impl(binaries, opts) {
|
||||
Err(_) => {
|
||||
return (
|
||||
Err(sys::ze_result_t::ZE_RESULT_ERROR_MODULE_LINK_FAILURE),
|
||||
None,
|
||||
)
|
||||
}
|
||||
Ok(prog) => prog,
|
||||
};
|
||||
match ocl_core::get_program_info(&ocl_program, ocl_core::ProgramInfo::Binaries) {
|
||||
Ok(ocl_core::ProgramInfoResult::Binaries(binaries)) => {
|
||||
let (module, build_log) = Self::build_native_logged(ctx, d, &binaries[0]);
|
||||
(module, Some(build_log))
|
||||
}
|
||||
_ => return (Err(sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN), None),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_link_spirv_impl<'a>(
|
||||
binaries: &[&'a [u8]],
|
||||
opts: Option<&CStr>,
|
||||
) -> ocl_core::Result<ocl_core::Program> {
|
||||
let platforms = ocl_core::get_platform_ids()?;
|
||||
let (platform, device) = platforms
|
||||
.iter()
|
||||
.find_map(|plat| {
|
||||
let devices =
|
||||
ocl_core::get_device_ids(plat, Some(ocl_core::DeviceType::GPU), None).ok()?;
|
||||
for dev in devices {
|
||||
let vendor =
|
||||
ocl_core::get_device_info(dev, ocl_core::DeviceInfo::VendorId).ok()?;
|
||||
if let ocl_core::DeviceInfoResult::VendorId(0x8086) = vendor {
|
||||
let dev_type =
|
||||
ocl_core::get_device_info(dev, ocl_core::DeviceInfo::Type).ok()?;
|
||||
if let ocl_core::DeviceInfoResult::Type(ocl_core::DeviceType::GPU) =
|
||||
dev_type
|
||||
{
|
||||
return Some((plat.clone(), dev));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.ok_or("")?;
|
||||
let ctx_props = ocl_core::ContextProperties::new().platform(platform);
|
||||
let ocl_ctx = ocl_core::create_context_from_type::<ocl_core::DeviceId>(
|
||||
Some(&ctx_props),
|
||||
ocl_core::DeviceType::GPU,
|
||||
None,
|
||||
None,
|
||||
)?;
|
||||
let mut programs = Vec::with_capacity(binaries.len());
|
||||
for binary in binaries {
|
||||
programs.push(ocl_core::create_program_with_il(&ocl_ctx, binary, None)?);
|
||||
}
|
||||
let options = match opts {
|
||||
Some(o) => o.to_owned(),
|
||||
None => CString::default(),
|
||||
};
|
||||
for program in programs.iter() {
|
||||
ocl_core::compile_program(
|
||||
program,
|
||||
Some(&[device]),
|
||||
&options,
|
||||
&[],
|
||||
&[],
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)?;
|
||||
}
|
||||
ocl_core::link_program::<ocl_core::DeviceId, _>(
|
||||
&ocl_ctx,
|
||||
Some(&[device]),
|
||||
&options,
|
||||
&programs.iter().collect::<Vec<_>>(),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn build_spirv(
|
||||
ctx: &mut Context,
|
||||
d: &Device,
|
||||
bin: &[u8],
|
||||
opts: Option<&CStr>,
|
||||
) -> Result<Self> {
|
||||
Module::new(ctx, true, d, bin, opts)
|
||||
}
|
||||
|
||||
pub fn build_spirv_logged(
|
||||
ctx: &mut Context,
|
||||
d: &Device,
|
||||
bin: &[u8],
|
||||
opts: Option<&CStr>,
|
||||
) -> (Result<Self>, BuildLog) {
|
||||
Module::new_logged(ctx, true, d, bin, opts)
|
||||
}
|
||||
|
||||
pub fn build_native_logged(ctx: &mut Context, d: &Device, bin: &[u8]) -> (Result<Self>, BuildLog) {
|
||||
Module::new_logged(ctx, false, d, bin, None)
|
||||
}
|
||||
|
||||
fn new(
|
||||
ctx: &mut Context,
|
||||
spirv: bool,
|
||||
d: &Device,
|
||||
bin: &[u8],
|
||||
opts: Option<&CStr>,
|
||||
) -> Result<Self> {
|
||||
let desc = sys::ze_module_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_MODULE_DESC,
|
||||
pNext: ptr::null(),
|
||||
format: if spirv {
|
||||
sys::ze_module_format_t::ZE_MODULE_FORMAT_IL_SPIRV
|
||||
} else {
|
||||
sys::ze_module_format_t::ZE_MODULE_FORMAT_NATIVE
|
||||
},
|
||||
inputSize: bin.len(),
|
||||
pInputModule: bin.as_ptr(),
|
||||
pBuildFlags: opts.map(|s| s.as_ptr() as *const _).unwrap_or(ptr::null()),
|
||||
pConstants: ptr::null(),
|
||||
};
|
||||
let mut result: sys::ze_module_handle_t = ptr::null_mut();
|
||||
let err = unsafe { sys::zeModuleCreate(ctx.0, d.0, &desc, &mut result, ptr::null_mut()) };
|
||||
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
Result::Err(err)
|
||||
} else {
|
||||
Ok(Module(result))
|
||||
}
|
||||
}
|
||||
|
||||
fn new_logged(
|
||||
ctx: &mut Context,
|
||||
spirv: bool,
|
||||
d: &Device,
|
||||
bin: &[u8],
|
||||
opts: Option<&CStr>,
|
||||
) -> (Result<Self>, BuildLog) {
|
||||
let desc = sys::ze_module_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_MODULE_DESC,
|
||||
pNext: ptr::null(),
|
||||
format: if spirv {
|
||||
sys::ze_module_format_t::ZE_MODULE_FORMAT_IL_SPIRV
|
||||
} else {
|
||||
sys::ze_module_format_t::ZE_MODULE_FORMAT_NATIVE
|
||||
},
|
||||
inputSize: bin.len(),
|
||||
pInputModule: bin.as_ptr(),
|
||||
pBuildFlags: opts.map(|s| s.as_ptr() as *const _).unwrap_or(ptr::null()),
|
||||
pConstants: ptr::null(),
|
||||
};
|
||||
let mut result: sys::ze_module_handle_t = ptr::null_mut();
|
||||
let mut log_handle = ptr::null_mut();
|
||||
let err = unsafe { sys::zeModuleCreate(ctx.0, d.0, &desc, &mut result, &mut log_handle) };
|
||||
let log = BuildLog(log_handle);
|
||||
if err != crate::sys::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
(Result::Err(err), log)
|
||||
} else {
|
||||
(Ok(Module(result)), log)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Module {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeModuleDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BuildLog(sys::ze_module_build_log_handle_t);
|
||||
|
||||
impl BuildLog {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_module_build_log_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_module_build_log_handle_t) -> Self {
|
||||
Self(x)
|
||||
}
|
||||
|
||||
pub fn get_cstring(&self) -> Result<CString> {
|
||||
let mut size = 0;
|
||||
check! { sys::zeModuleBuildLogGetString(self.0, &mut size, ptr::null_mut()) };
|
||||
let mut str_vec = vec![0u8; size];
|
||||
check! { sys::zeModuleBuildLogGetString(self.0, &mut size, str_vec.as_mut_ptr() as *mut i8) };
|
||||
str_vec.pop();
|
||||
Ok(CString::new(str_vec).map_err(|_| sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN)?)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for BuildLog {
|
||||
fn drop(&mut self) {
|
||||
check_panic!(sys::zeModuleBuildLogDestroy(self.0));
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SafeRepr {}
|
||||
impl SafeRepr for u8 {}
|
||||
impl SafeRepr for i8 {}
|
||||
impl SafeRepr for u16 {}
|
||||
impl SafeRepr for i16 {}
|
||||
impl SafeRepr for u32 {}
|
||||
impl SafeRepr for i32 {}
|
||||
impl SafeRepr for u64 {}
|
||||
impl SafeRepr for i64 {}
|
||||
impl SafeRepr for f32 {}
|
||||
impl SafeRepr for f64 {}
|
||||
|
||||
pub struct DeviceBuffer<T: SafeRepr> {
|
||||
ptr: *mut c_void,
|
||||
ctx: sys::ze_context_handle_t,
|
||||
len: usize,
|
||||
marker: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: SafeRepr> DeviceBuffer<T> {
|
||||
pub unsafe fn as_ffi(&self) -> *mut c_void {
|
||||
self.ptr
|
||||
}
|
||||
pub unsafe fn from_ffi(ctx: sys::ze_context_handle_t, ptr: *mut c_void, len: usize) -> Self {
|
||||
let marker = PhantomData::<T>;
|
||||
Self {
|
||||
ptr,
|
||||
ctx,
|
||||
len,
|
||||
marker,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(ctx: &mut Context, dev: &Device, len: usize) -> Result<Self> {
|
||||
let desc = sys::_ze_device_mem_alloc_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_device_mem_alloc_flags_t(0),
|
||||
ordinal: 0,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeMemAllocDevice(
|
||||
ctx.0,
|
||||
&desc,
|
||||
len * mem::size_of::<T>(),
|
||||
mem::align_of::<T>(),
|
||||
dev.0,
|
||||
&mut result
|
||||
));
|
||||
Ok(unsafe { Self::from_ffi(ctx.0, result, len) })
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: SafeRepr> Drop for DeviceBuffer<T> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeMemFree(self.ctx, self.ptr) };
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CommandList<'a>(sys::ze_command_list_handle_t, PhantomData<&'a ()>);
|
||||
|
||||
impl<'a> CommandList<'a> {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_command_list_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_command_list_handle_t) -> Self {
|
||||
Self(x, PhantomData)
|
||||
}
|
||||
|
||||
pub fn new(ctx: &mut Context, dev: &Device) -> Result<Self> {
|
||||
let desc = sys::ze_command_list_desc_t {
|
||||
stype: sys::_ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
|
||||
commandQueueGroupOrdinal: 0,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_command_list_flags_t(0),
|
||||
};
|
||||
let mut result: sys::ze_command_list_handle_t = ptr::null_mut();
|
||||
check!(sys::zeCommandListCreate(ctx.0, dev.0, &desc, &mut result));
|
||||
Ok(Self(result, PhantomData))
|
||||
}
|
||||
|
||||
pub fn append_memory_copy<
|
||||
T: 'a,
|
||||
Dst: Into<BufferPtrMut<'a, T>>,
|
||||
Src: Into<BufferPtr<'a, T>>,
|
||||
>(
|
||||
&mut self,
|
||||
dst: Dst,
|
||||
src: Src,
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let dst = dst.into();
|
||||
let src = src.into();
|
||||
let elements = std::cmp::min(dst.len(), src.len());
|
||||
let length = elements * mem::size_of::<T>();
|
||||
unsafe { self.append_memory_copy_unsafe(dst.get(), src.get(), length, signal, wait) }
|
||||
}
|
||||
|
||||
pub unsafe fn append_memory_copy_unsafe(
|
||||
&mut self,
|
||||
dst: *mut c_void,
|
||||
src: *const c_void,
|
||||
length: usize,
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let (wait_len, wait_ptr) = Event::raw_slice(wait);
|
||||
check!(sys::zeCommandListAppendMemoryCopy(
|
||||
self.0,
|
||||
dst,
|
||||
src,
|
||||
length,
|
||||
signal_event,
|
||||
wait_len,
|
||||
wait_ptr
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn append_memory_fill<T>(
|
||||
&mut self,
|
||||
dst: BufferPtrMut<'a, T>,
|
||||
pattern: u8,
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let raw_pattern = &pattern as *const u8 as *const _;
|
||||
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let (wait_len, wait_ptr) = unsafe { Event::raw_slice(wait) };
|
||||
let byte_len = dst.len() * mem::size_of::<T>();
|
||||
check!(sys::zeCommandListAppendMemoryFill(
|
||||
self.0,
|
||||
dst.get(),
|
||||
raw_pattern,
|
||||
mem::size_of::<u8>(),
|
||||
byte_len,
|
||||
signal_event,
|
||||
wait_len,
|
||||
wait_ptr
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub unsafe fn append_memory_fill_unsafe<T: Copy + Sized>(
|
||||
&mut self,
|
||||
dst: *mut c_void,
|
||||
pattern: &T,
|
||||
byte_size: usize,
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let (wait_len, wait_ptr) = Event::raw_slice(wait);
|
||||
check!(sys::zeCommandListAppendMemoryFill(
|
||||
self.0,
|
||||
dst,
|
||||
pattern as *const T as *const _,
|
||||
mem::size_of::<T>(),
|
||||
byte_size,
|
||||
signal_event,
|
||||
wait_len,
|
||||
wait_ptr
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn append_launch_kernel(
|
||||
&mut self,
|
||||
kernel: &'a Kernel,
|
||||
group_count: &[u32; 3],
|
||||
signal: Option<&mut Event<'a>>,
|
||||
wait: &mut [Event<'a>],
|
||||
) -> Result<()> {
|
||||
let gr_count = sys::ze_group_count_t {
|
||||
groupCountX: group_count[0],
|
||||
groupCountY: group_count[1],
|
||||
groupCountZ: group_count[2],
|
||||
};
|
||||
let signal_event = signal.map(|e| e.0).unwrap_or(ptr::null_mut());
|
||||
let (wait_len, wait_ptr) = unsafe { Event::raw_slice(wait) };
|
||||
check!(sys::zeCommandListAppendLaunchKernel(
|
||||
self.0,
|
||||
kernel.0,
|
||||
&gr_count,
|
||||
signal_event,
|
||||
wait_len,
|
||||
wait_ptr,
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for CommandList<'a> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeCommandListDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FenceGuard<'a>(
|
||||
sys::ze_fence_handle_t,
|
||||
sys::ze_command_list_handle_t,
|
||||
PhantomData<&'a ()>,
|
||||
);
|
||||
|
||||
impl<'a> FenceGuard<'a> {
|
||||
fn new(q: &'a CommandQueue, cmd_list: sys::ze_command_list_handle_t) -> Result<Self> {
|
||||
let desc = sys::_ze_fence_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_FENCE_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_fence_flags_t(0),
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeFenceCreate(q.0, &desc, &mut result));
|
||||
Ok(FenceGuard(result, cmd_list, PhantomData))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for FenceGuard<'a> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeFenceHostSynchronize(self.0, u64::max_value()) };
|
||||
check_panic! { sys::zeFenceDestroy(self.0) };
|
||||
check_panic! { sys::zeCommandListDestroy(self.1) };
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct BufferPtr<'a, T> {
|
||||
ptr: *const c_void,
|
||||
marker: PhantomData<&'a T>,
|
||||
elems: usize,
|
||||
}
|
||||
|
||||
impl<'a, T> BufferPtr<'a, T> {
|
||||
pub unsafe fn get(self) -> *const c_void {
|
||||
return self.ptr;
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.elems
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> From<&'a [T]> for BufferPtr<'a, T> {
|
||||
fn from(s: &'a [T]) -> Self {
|
||||
BufferPtr {
|
||||
ptr: s.as_ptr() as *const _,
|
||||
marker: PhantomData,
|
||||
elems: s.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: SafeRepr> From<&'a DeviceBuffer<T>> for BufferPtr<'a, T> {
|
||||
fn from(b: &'a DeviceBuffer<T>) -> Self {
|
||||
BufferPtr {
|
||||
ptr: b.ptr as *const _,
|
||||
marker: PhantomData,
|
||||
elems: b.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct BufferPtrMut<'a, T> {
|
||||
ptr: *mut c_void,
|
||||
marker: PhantomData<&'a mut T>,
|
||||
elems: usize,
|
||||
}
|
||||
|
||||
impl<'a, T> BufferPtrMut<'a, T> {
|
||||
pub unsafe fn get(self) -> *mut c_void {
|
||||
return self.ptr;
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.elems
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> From<&'a mut [T]> for BufferPtrMut<'a, T> {
|
||||
fn from(s: &'a mut [T]) -> Self {
|
||||
BufferPtrMut {
|
||||
ptr: s.as_mut_ptr() as *mut _,
|
||||
marker: PhantomData,
|
||||
elems: s.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: SafeRepr> From<&'a mut DeviceBuffer<T>> for BufferPtrMut<'a, T> {
|
||||
fn from(b: &'a mut DeviceBuffer<T>) -> Self {
|
||||
BufferPtrMut {
|
||||
ptr: b.ptr as *mut _,
|
||||
marker: PhantomData,
|
||||
elems: b.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: SafeRepr> From<BufferPtrMut<'a, T>> for BufferPtr<'a, T> {
|
||||
fn from(b: BufferPtrMut<'a, T>) -> Self {
|
||||
BufferPtr {
|
||||
ptr: b.ptr,
|
||||
marker: PhantomData,
|
||||
elems: b.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub struct EventPool<'a>(sys::ze_event_pool_handle_t, PhantomData<&'a ()>);
|
||||
|
||||
impl<'a> EventPool<'a> {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_event_pool_handle_t {
|
||||
self.0
|
||||
}
|
||||
pub unsafe fn from_ffi(x: sys::ze_event_pool_handle_t) -> Self {
|
||||
Self(x, PhantomData)
|
||||
}
|
||||
pub fn new(ctx: &mut Context, count: u32, dev: Option<&[&'a Device]>) -> Result<Self> {
|
||||
let desc = sys::ze_event_pool_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_event_pool_flags_t(0),
|
||||
count: count,
|
||||
};
|
||||
let mut dev = dev.map(|d| d.iter().map(|d| d.0).collect::<Vec<_>>());
|
||||
let dev_len = dev.as_ref().map_or(0, |d| d.len() as u32);
|
||||
let dev_ptr = dev.as_mut().map_or(ptr::null_mut(), |d| d.as_mut_ptr());
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeEventPoolCreate(
|
||||
ctx.0,
|
||||
&desc,
|
||||
dev_len,
|
||||
dev_ptr,
|
||||
&mut result
|
||||
));
|
||||
Ok(Self(result, PhantomData))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for EventPool<'a> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeEventPoolDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Event<'a>(sys::ze_event_handle_t, PhantomData<&'a ()>);
|
||||
|
||||
impl<'a> Event<'a> {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_event_handle_t {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub unsafe fn from_ffi(x: sys::ze_event_handle_t) -> Self {
|
||||
Self(x, PhantomData)
|
||||
}
|
||||
|
||||
pub fn new(pool: &'a EventPool, index: u32) -> Result<Self> {
|
||||
let desc = sys::ze_event_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_EVENT_DESC,
|
||||
pNext: ptr::null(),
|
||||
index: index,
|
||||
signal: sys::ze_event_scope_flags_t(0),
|
||||
wait: sys::ze_event_scope_flags_t(0),
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeEventCreate(pool.0, &desc, &mut result));
|
||||
Ok(Self(result, PhantomData))
|
||||
}
|
||||
|
||||
unsafe fn raw_slice(e: &mut [Event]) -> (u32, *mut sys::ze_event_handle_t) {
|
||||
let ptr = if e.len() == 0 {
|
||||
ptr::null_mut()
|
||||
} else {
|
||||
e.as_mut_ptr()
|
||||
};
|
||||
(e.len() as u32, ptr as *mut sys::ze_event_handle_t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for Event<'a> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeEventDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Kernel<'a>(sys::ze_kernel_handle_t, PhantomData<&'a ()>);
|
||||
|
||||
impl<'a> Kernel<'a> {
|
||||
pub unsafe fn as_ffi(&self) -> sys::ze_kernel_handle_t {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub unsafe fn from_ffi(x: sys::ze_kernel_handle_t) -> Self {
|
||||
Self(x, PhantomData)
|
||||
}
|
||||
|
||||
pub fn new_resident(module: &'a Module, name: &CStr) -> Result<Self> {
|
||||
let desc = sys::ze_kernel_desc_t {
|
||||
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_KERNEL_DESC,
|
||||
pNext: ptr::null(),
|
||||
flags: sys::ze_kernel_flags_t::ZE_KERNEL_FLAG_FORCE_RESIDENCY,
|
||||
pKernelName: name.as_ptr() as *const _,
|
||||
};
|
||||
let mut result = ptr::null_mut();
|
||||
check!(sys::zeKernelCreate(module.0, &desc, &mut result));
|
||||
Ok(Self(result, PhantomData))
|
||||
}
|
||||
|
||||
pub fn set_indirect_access(
|
||||
&mut self,
|
||||
flags: sys::ze_kernel_indirect_access_flags_t,
|
||||
) -> Result<()> {
|
||||
check!(sys::zeKernelSetIndirectAccess(self.0, flags));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_arg_buffer<T: 'a, Buff: Into<BufferPtr<'a, T>>>(
|
||||
&self,
|
||||
index: u32,
|
||||
buff: Buff,
|
||||
) -> Result<()> {
|
||||
let ptr = unsafe { buff.into().get() };
|
||||
check!(sys::zeKernelSetArgumentValue(
|
||||
self.0,
|
||||
index,
|
||||
mem::size_of::<*const ()>(),
|
||||
&ptr as *const _ as *const _,
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_arg_scalar<T: Copy>(&self, index: u32, value: &T) -> Result<()> {
|
||||
check!(sys::zeKernelSetArgumentValue(
|
||||
self.0,
|
||||
index,
|
||||
mem::size_of::<T>(),
|
||||
value as *const T as *const _,
|
||||
));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub unsafe fn set_arg_raw(&self, index: u32, size: usize, value: *const c_void) -> Result<()> {
|
||||
check!(sys::zeKernelSetArgumentValue(self.0, index, size, value));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_group_size(&self, x: u32, y: u32, z: u32) -> Result<()> {
|
||||
check!(sys::zeKernelSetGroupSize(self.0, x, y, z));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_properties(&self) -> Result<Box<sys::ze_kernel_properties_t>> {
|
||||
let mut props = Box::new(unsafe { mem::zeroed::<sys::ze_kernel_properties_t>() });
|
||||
check!(sys::zeKernelGetProperties(self.0, props.as_mut() as *mut _));
|
||||
Ok(props)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for Kernel<'a> {
|
||||
#[allow(unused_must_use)]
|
||||
fn drop(&mut self) {
|
||||
check_panic! { sys::zeKernelDestroy(self.0) };
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn event_has_correct_layout() {
|
||||
assert_eq!(
|
||||
mem::size_of::<Event>(),
|
||||
mem::size_of::<sys::ze_event_handle_t>()
|
||||
);
|
||||
}
|
||||
}
|
17
llvm_zluda/Cargo.toml
Normal file
17
llvm_zluda/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "llvm_zluda"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
bitflags = "2.4"
|
||||
|
||||
[dependencies.llvm-sys]
|
||||
version = "170"
|
||||
features = [ "disable-alltargets-init", "no-llvm-linking" ]
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1"
|
||||
cc = "1.0.69"
|
131
llvm_zluda/build.rs
Normal file
131
llvm_zluda/build.rs
Normal file
@ -0,0 +1,131 @@
|
||||
use cmake::Config;
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
const COMPONENTS: &[&'static str] = &[
|
||||
"LLVMCore",
|
||||
"LLVMBitWriter",
|
||||
#[cfg(debug_assertions)]
|
||||
"LLVMAnalysis", // for module verify
|
||||
#[cfg(debug_assertions)]
|
||||
"LLVMBitReader",
|
||||
];
|
||||
|
||||
fn main() {
|
||||
let mut cmake = Config::new(r"../ext/llvm-project/llvm");
|
||||
try_use_ninja(&mut cmake);
|
||||
cmake
|
||||
// It's not like we can do anything about the warnings
|
||||
.define("LLVM_ENABLE_WARNINGS", "OFF")
|
||||
// For some reason Rust always links to release MSVCRT
|
||||
.define("CMAKE_MSVC_RUNTIME_LIBRARY", "MultiThreadedDLL")
|
||||
.define("LLVM_ENABLE_TERMINFO", "OFF")
|
||||
.define("LLVM_ENABLE_LIBXML2", "OFF")
|
||||
.define("LLVM_ENABLE_LIBEDIT", "OFF")
|
||||
.define("LLVM_ENABLE_LIBPFM", "OFF")
|
||||
.define("LLVM_ENABLE_ZLIB", "OFF")
|
||||
.define("LLVM_ENABLE_ZSTD", "OFF")
|
||||
.define("LLVM_INCLUDE_BENCHMARKS", "OFF")
|
||||
.define("LLVM_INCLUDE_EXAMPLES", "OFF")
|
||||
.define("LLVM_INCLUDE_TESTS", "OFF")
|
||||
.define("LLVM_BUILD_TOOLS", "OFF")
|
||||
.define("LLVM_TARGETS_TO_BUILD", "")
|
||||
.define("LLVM_ENABLE_PROJECTS", "");
|
||||
cmake.build_target("llvm-config");
|
||||
let llvm_dir = cmake.build();
|
||||
for c in COMPONENTS {
|
||||
cmake.build_target(c);
|
||||
cmake.build();
|
||||
}
|
||||
let cmake_profile = cmake.get_profile();
|
||||
let (cxxflags, ldflags, libdir, lib_names, system_libs) =
|
||||
llvm_config(&llvm_dir, &["build", "bin", "llvm-config"])
|
||||
.or_else(|_| llvm_config(&llvm_dir, &["build", cmake_profile, "bin", "llvm-config"]))
|
||||
.unwrap();
|
||||
println!("cargo:rustc-link-arg={ldflags}");
|
||||
println!("cargo:rustc-link-search=native={libdir}");
|
||||
for lib in system_libs.split_ascii_whitespace() {
|
||||
println!("cargo:rustc-link-arg={lib}");
|
||||
}
|
||||
link_llvm_components(lib_names);
|
||||
compile_cxx_lib(cxxflags);
|
||||
}
|
||||
|
||||
fn try_use_ninja(cmake: &mut Config) {
|
||||
let mut cmd = Command::new("ninja");
|
||||
cmd.arg("--version");
|
||||
if let Ok(status) = cmd.status() {
|
||||
if status.success() {
|
||||
cmake.generator("Ninja");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn llvm_config(
|
||||
llvm_build_dir: &PathBuf,
|
||||
path_to_llvm_config: &[&str],
|
||||
) -> io::Result<(String, String, String, String, String)> {
|
||||
let mut llvm_build_path = llvm_build_dir.clone();
|
||||
llvm_build_path.extend(path_to_llvm_config);
|
||||
let mut cmd = Command::new(llvm_build_path);
|
||||
cmd.args([
|
||||
"--link-static",
|
||||
"--cxxflags",
|
||||
"--ldflags",
|
||||
"--libdir",
|
||||
"--libnames",
|
||||
"--system-libs",
|
||||
]);
|
||||
for c in COMPONENTS {
|
||||
cmd.arg(c[4..].to_lowercase());
|
||||
}
|
||||
let output = cmd.output()?;
|
||||
if !output.status.success() {
|
||||
return Err(io::Error::from(io::ErrorKind::Other));
|
||||
}
|
||||
let output = unsafe { String::from_utf8_unchecked(output.stdout) };
|
||||
let mut lines = output.lines();
|
||||
let cxxflags = lines.next().unwrap();
|
||||
let ldflags = lines.next().unwrap();
|
||||
let libdir = lines.next().unwrap();
|
||||
let lib_names = lines.next().unwrap();
|
||||
let system_libs = lines.next().unwrap();
|
||||
Ok((
|
||||
cxxflags.to_string(),
|
||||
ldflags.to_string(),
|
||||
libdir.to_string(),
|
||||
lib_names.to_string(),
|
||||
system_libs.to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn compile_cxx_lib(cxxflags: String) {
|
||||
let mut cc = cc::Build::new();
|
||||
for flag in cxxflags.split_whitespace() {
|
||||
cc.flag(flag);
|
||||
}
|
||||
cc.cpp(true).file("src/lib.cpp").compile("llvm_zluda_cpp");
|
||||
println!("cargo:rerun-if-changed=src/lib.cpp");
|
||||
println!("cargo:rerun-if-changed=src/lib.rs");
|
||||
}
|
||||
|
||||
fn link_llvm_components(components: String) {
|
||||
for component in components.split_whitespace() {
|
||||
let component = if let Some(component) = component
|
||||
.strip_prefix("lib")
|
||||
.and_then(|component| component.strip_suffix(".a"))
|
||||
{
|
||||
// Unix (Linux/Mac)
|
||||
// libLLVMfoo.a
|
||||
component
|
||||
} else if let Some(component) = component.strip_suffix(".lib") {
|
||||
// Windows
|
||||
// LLVMfoo.lib
|
||||
component
|
||||
} else {
|
||||
panic!("'{}' does not look like a static library name", component)
|
||||
};
|
||||
println!("cargo:rustc-link-lib={component}");
|
||||
}
|
||||
}
|
199
llvm_zluda/src/lib.cpp
Normal file
199
llvm_zluda/src/lib.cpp
Normal file
@ -0,0 +1,199 @@
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/Type.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
LLVMZludaAtomicRMWBinOpXchg, /**< Set the new value and return the one old */
|
||||
LLVMZludaAtomicRMWBinOpAdd, /**< Add a value and return the old one */
|
||||
LLVMZludaAtomicRMWBinOpSub, /**< Subtract a value and return the old one */
|
||||
LLVMZludaAtomicRMWBinOpAnd, /**< And a value and return the old one */
|
||||
LLVMZludaAtomicRMWBinOpNand, /**< Not-And a value and return the old one */
|
||||
LLVMZludaAtomicRMWBinOpOr, /**< OR a value and return the old one */
|
||||
LLVMZludaAtomicRMWBinOpXor, /**< Xor a value and return the old one */
|
||||
LLVMZludaAtomicRMWBinOpMax, /**< Sets the value if it's greater than the
|
||||
original using a signed comparison and return
|
||||
the old one */
|
||||
LLVMZludaAtomicRMWBinOpMin, /**< Sets the value if it's Smaller than the
|
||||
original using a signed comparison and return
|
||||
the old one */
|
||||
LLVMZludaAtomicRMWBinOpUMax, /**< Sets the value if it's greater than the
|
||||
original using an unsigned comparison and return
|
||||
the old one */
|
||||
LLVMZludaAtomicRMWBinOpUMin, /**< Sets the value if it's greater than the
|
||||
original using an unsigned comparison and return
|
||||
the old one */
|
||||
LLVMZludaAtomicRMWBinOpFAdd, /**< Add a floating point value and return the
|
||||
old one */
|
||||
LLVMZludaAtomicRMWBinOpFSub, /**< Subtract a floating point value and return the
|
||||
old one */
|
||||
LLVMZludaAtomicRMWBinOpFMax, /**< Sets the value if it's greater than the
|
||||
original using an floating point comparison and
|
||||
return the old one */
|
||||
LLVMZludaAtomicRMWBinOpFMin, /**< Sets the value if it's smaller than the
|
||||
original using an floating point comparison and
|
||||
return the old one */
|
||||
LLVMZludaAtomicRMWBinOpUIncWrap, /**< Increments the value, wrapping back to zero
|
||||
when incremented above input value */
|
||||
LLVMZludaAtomicRMWBinOpUDecWrap, /**< Decrements the value, wrapping back to
|
||||
the input value when decremented below zero */
|
||||
} LLVMZludaAtomicRMWBinOp;
|
||||
|
||||
static llvm::AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMZludaAtomicRMWBinOp BinOp)
|
||||
{
|
||||
switch (BinOp)
|
||||
{
|
||||
case LLVMZludaAtomicRMWBinOpXchg:
|
||||
return llvm::AtomicRMWInst::Xchg;
|
||||
case LLVMZludaAtomicRMWBinOpAdd:
|
||||
return llvm::AtomicRMWInst::Add;
|
||||
case LLVMZludaAtomicRMWBinOpSub:
|
||||
return llvm::AtomicRMWInst::Sub;
|
||||
case LLVMZludaAtomicRMWBinOpAnd:
|
||||
return llvm::AtomicRMWInst::And;
|
||||
case LLVMZludaAtomicRMWBinOpNand:
|
||||
return llvm::AtomicRMWInst::Nand;
|
||||
case LLVMZludaAtomicRMWBinOpOr:
|
||||
return llvm::AtomicRMWInst::Or;
|
||||
case LLVMZludaAtomicRMWBinOpXor:
|
||||
return llvm::AtomicRMWInst::Xor;
|
||||
case LLVMZludaAtomicRMWBinOpMax:
|
||||
return llvm::AtomicRMWInst::Max;
|
||||
case LLVMZludaAtomicRMWBinOpMin:
|
||||
return llvm::AtomicRMWInst::Min;
|
||||
case LLVMZludaAtomicRMWBinOpUMax:
|
||||
return llvm::AtomicRMWInst::UMax;
|
||||
case LLVMZludaAtomicRMWBinOpUMin:
|
||||
return llvm::AtomicRMWInst::UMin;
|
||||
case LLVMZludaAtomicRMWBinOpFAdd:
|
||||
return llvm::AtomicRMWInst::FAdd;
|
||||
case LLVMZludaAtomicRMWBinOpFSub:
|
||||
return llvm::AtomicRMWInst::FSub;
|
||||
case LLVMZludaAtomicRMWBinOpFMax:
|
||||
return llvm::AtomicRMWInst::FMax;
|
||||
case LLVMZludaAtomicRMWBinOpFMin:
|
||||
return llvm::AtomicRMWInst::FMin;
|
||||
case LLVMZludaAtomicRMWBinOpUIncWrap:
|
||||
return llvm::AtomicRMWInst::UIncWrap;
|
||||
case LLVMZludaAtomicRMWBinOpUDecWrap:
|
||||
return llvm::AtomicRMWInst::UDecWrap;
|
||||
}
|
||||
|
||||
llvm_unreachable("Invalid LLVMZludaAtomicRMWBinOp value!");
|
||||
}
|
||||
|
||||
static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering)
|
||||
{
|
||||
switch (Ordering)
|
||||
{
|
||||
case LLVMAtomicOrderingNotAtomic:
|
||||
return AtomicOrdering::NotAtomic;
|
||||
case LLVMAtomicOrderingUnordered:
|
||||
return AtomicOrdering::Unordered;
|
||||
case LLVMAtomicOrderingMonotonic:
|
||||
return AtomicOrdering::Monotonic;
|
||||
case LLVMAtomicOrderingAcquire:
|
||||
return AtomicOrdering::Acquire;
|
||||
case LLVMAtomicOrderingRelease:
|
||||
return AtomicOrdering::Release;
|
||||
case LLVMAtomicOrderingAcquireRelease:
|
||||
return AtomicOrdering::AcquireRelease;
|
||||
case LLVMAtomicOrderingSequentiallyConsistent:
|
||||
return AtomicOrdering::SequentiallyConsistent;
|
||||
}
|
||||
|
||||
llvm_unreachable("Invalid LLVMAtomicOrdering value!");
|
||||
}
|
||||
|
||||
typedef unsigned LLVMFastMathFlags;
|
||||
|
||||
enum
|
||||
{
|
||||
LLVMFastMathAllowReassoc = (1 << 0),
|
||||
LLVMFastMathNoNaNs = (1 << 1),
|
||||
LLVMFastMathNoInfs = (1 << 2),
|
||||
LLVMFastMathNoSignedZeros = (1 << 3),
|
||||
LLVMFastMathAllowReciprocal = (1 << 4),
|
||||
LLVMFastMathAllowContract = (1 << 5),
|
||||
LLVMFastMathApproxFunc = (1 << 6),
|
||||
LLVMFastMathNone = 0,
|
||||
LLVMFastMathAll = LLVMFastMathAllowReassoc | LLVMFastMathNoNaNs |
|
||||
LLVMFastMathNoInfs | LLVMFastMathNoSignedZeros |
|
||||
LLVMFastMathAllowReciprocal | LLVMFastMathAllowContract |
|
||||
LLVMFastMathApproxFunc,
|
||||
};
|
||||
|
||||
static FastMathFlags mapFromLLVMFastMathFlags(LLVMFastMathFlags FMF)
|
||||
{
|
||||
FastMathFlags NewFMF;
|
||||
NewFMF.setAllowReassoc((FMF & LLVMFastMathAllowReassoc) != 0);
|
||||
NewFMF.setNoNaNs((FMF & LLVMFastMathNoNaNs) != 0);
|
||||
NewFMF.setNoInfs((FMF & LLVMFastMathNoInfs) != 0);
|
||||
NewFMF.setNoSignedZeros((FMF & LLVMFastMathNoSignedZeros) != 0);
|
||||
NewFMF.setAllowReciprocal((FMF & LLVMFastMathAllowReciprocal) != 0);
|
||||
NewFMF.setAllowContract((FMF & LLVMFastMathAllowContract) != 0);
|
||||
NewFMF.setApproxFunc((FMF & LLVMFastMathApproxFunc) != 0);
|
||||
|
||||
return NewFMF;
|
||||
}
|
||||
|
||||
LLVM_C_EXTERN_C_BEGIN
|
||||
|
||||
LLVMValueRef LLVMZludaBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, unsigned AddrSpace,
|
||||
const char *Name)
|
||||
{
|
||||
return llvm::wrap(llvm::unwrap(B)->CreateAlloca(llvm::unwrap(Ty), AddrSpace, nullptr, Name));
|
||||
}
|
||||
|
||||
LLVMValueRef LLVMZludaBuildAtomicRMW(LLVMBuilderRef B, LLVMZludaAtomicRMWBinOp op,
|
||||
LLVMValueRef PTR, LLVMValueRef Val,
|
||||
char *scope,
|
||||
LLVMAtomicOrdering ordering)
|
||||
{
|
||||
auto builder = llvm::unwrap(B);
|
||||
LLVMContext &context = builder->getContext();
|
||||
llvm::AtomicRMWInst::BinOp intop = mapFromLLVMRMWBinOp(op);
|
||||
return llvm::wrap(builder->CreateAtomicRMW(
|
||||
intop, llvm::unwrap(PTR), llvm::unwrap(Val), llvm::MaybeAlign(),
|
||||
mapFromLLVMOrdering(ordering),
|
||||
context.getOrInsertSyncScopeID(scope)));
|
||||
}
|
||||
|
||||
LLVMValueRef LLVMZludaBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr,
|
||||
LLVMValueRef Cmp, LLVMValueRef New,
|
||||
char *scope,
|
||||
LLVMAtomicOrdering SuccessOrdering,
|
||||
LLVMAtomicOrdering FailureOrdering)
|
||||
{
|
||||
auto builder = llvm::unwrap(B);
|
||||
LLVMContext &context = builder->getContext();
|
||||
return wrap(builder->CreateAtomicCmpXchg(
|
||||
unwrap(Ptr), unwrap(Cmp), unwrap(New), MaybeAlign(),
|
||||
mapFromLLVMOrdering(SuccessOrdering),
|
||||
mapFromLLVMOrdering(FailureOrdering),
|
||||
context.getOrInsertSyncScopeID(scope)));
|
||||
}
|
||||
|
||||
void LLVMZludaSetFastMathFlags(LLVMValueRef FPMathInst, LLVMFastMathFlags FMF)
|
||||
{
|
||||
Value *P = unwrap<Value>(FPMathInst);
|
||||
cast<Instruction>(P)->setFastMathFlags(mapFromLLVMFastMathFlags(FMF));
|
||||
}
|
||||
|
||||
void LLVMZludaBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
|
||||
char *scope, const char *Name)
|
||||
{
|
||||
auto builder = llvm::unwrap(B);
|
||||
LLVMContext &context = builder->getContext();
|
||||
builder->CreateFence(mapFromLLVMOrdering(Ordering),
|
||||
context.getOrInsertSyncScopeID(scope),
|
||||
Name);
|
||||
}
|
||||
|
||||
LLVM_C_EXTERN_C_END
|
81
llvm_zluda/src/lib.rs
Normal file
81
llvm_zluda/src/lib.rs
Normal file
@ -0,0 +1,81 @@
|
||||
#![allow(non_upper_case_globals)]
|
||||
use llvm_sys::prelude::*;
|
||||
pub use llvm_sys::*;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub enum LLVMZludaAtomicRMWBinOp {
|
||||
LLVMZludaAtomicRMWBinOpXchg = 0,
|
||||
LLVMZludaAtomicRMWBinOpAdd = 1,
|
||||
LLVMZludaAtomicRMWBinOpSub = 2,
|
||||
LLVMZludaAtomicRMWBinOpAnd = 3,
|
||||
LLVMZludaAtomicRMWBinOpNand = 4,
|
||||
LLVMZludaAtomicRMWBinOpOr = 5,
|
||||
LLVMZludaAtomicRMWBinOpXor = 6,
|
||||
LLVMZludaAtomicRMWBinOpMax = 7,
|
||||
LLVMZludaAtomicRMWBinOpMin = 8,
|
||||
LLVMZludaAtomicRMWBinOpUMax = 9,
|
||||
LLVMZludaAtomicRMWBinOpUMin = 10,
|
||||
LLVMZludaAtomicRMWBinOpFAdd = 11,
|
||||
LLVMZludaAtomicRMWBinOpFSub = 12,
|
||||
LLVMZludaAtomicRMWBinOpFMax = 13,
|
||||
LLVMZludaAtomicRMWBinOpFMin = 14,
|
||||
LLVMZludaAtomicRMWBinOpUIncWrap = 15,
|
||||
LLVMZludaAtomicRMWBinOpUDecWrap = 16,
|
||||
}
|
||||
|
||||
// Backport from LLVM 19
|
||||
pub const LLVMZludaFastMathAllowReassoc: ::std::ffi::c_uint = 1 << 0;
|
||||
pub const LLVMZludaFastMathNoNaNs: ::std::ffi::c_uint = 1 << 1;
|
||||
pub const LLVMZludaFastMathNoInfs: ::std::ffi::c_uint = 1 << 2;
|
||||
pub const LLVMZludaFastMathNoSignedZeros: ::std::ffi::c_uint = 1 << 3;
|
||||
pub const LLVMZludaFastMathAllowReciprocal: ::std::ffi::c_uint = 1 << 4;
|
||||
pub const LLVMZludaFastMathAllowContract: ::std::ffi::c_uint = 1 << 5;
|
||||
pub const LLVMZludaFastMathApproxFunc: ::std::ffi::c_uint = 1 << 6;
|
||||
pub const LLVMZludaFastMathNone: ::std::ffi::c_uint = 0;
|
||||
pub const LLVMZludaFastMathAll: ::std::ffi::c_uint = LLVMZludaFastMathAllowReassoc
|
||||
| LLVMZludaFastMathNoNaNs
|
||||
| LLVMZludaFastMathNoInfs
|
||||
| LLVMZludaFastMathNoSignedZeros
|
||||
| LLVMZludaFastMathAllowReciprocal
|
||||
| LLVMZludaFastMathAllowContract
|
||||
| LLVMZludaFastMathApproxFunc;
|
||||
|
||||
pub type LLVMZludaFastMathFlags = std::ffi::c_uint;
|
||||
|
||||
extern "C" {
|
||||
pub fn LLVMZludaBuildAlloca(
|
||||
B: LLVMBuilderRef,
|
||||
Ty: LLVMTypeRef,
|
||||
AddrSpace: u32,
|
||||
Name: *const i8,
|
||||
) -> LLVMValueRef;
|
||||
|
||||
pub fn LLVMZludaBuildAtomicRMW(
|
||||
B: LLVMBuilderRef,
|
||||
op: LLVMZludaAtomicRMWBinOp,
|
||||
PTR: LLVMValueRef,
|
||||
Val: LLVMValueRef,
|
||||
scope: *const i8,
|
||||
ordering: LLVMAtomicOrdering,
|
||||
) -> LLVMValueRef;
|
||||
|
||||
pub fn LLVMZludaBuildAtomicCmpXchg(
|
||||
B: LLVMBuilderRef,
|
||||
Ptr: LLVMValueRef,
|
||||
Cmp: LLVMValueRef,
|
||||
New: LLVMValueRef,
|
||||
scope: *const i8,
|
||||
SuccessOrdering: LLVMAtomicOrdering,
|
||||
FailureOrdering: LLVMAtomicOrdering,
|
||||
) -> LLVMValueRef;
|
||||
|
||||
pub fn LLVMZludaSetFastMathFlags(FPMathInst: LLVMValueRef, FMF: LLVMZludaFastMathFlags);
|
||||
|
||||
pub fn LLVMZludaBuildFence(
|
||||
B: LLVMBuilderRef,
|
||||
ordering: LLVMAtomicOrdering,
|
||||
scope: *const i8,
|
||||
Name: *const i8,
|
||||
) -> LLVMValueRef;
|
||||
}
|
@ -2,26 +2,30 @@
|
||||
name = "ptx"
|
||||
version = "0.0.0"
|
||||
authors = ["Andrzej Janik <vosen@vosen.pl>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
lalrpop-util = "0.19"
|
||||
regex = "1"
|
||||
rspirv = "0.6"
|
||||
spirv_headers = "~1.4.2"
|
||||
ptx_parser = { path = "../ptx_parser" }
|
||||
llvm_zluda = { path = "../llvm_zluda" }
|
||||
quick-error = "1.2"
|
||||
thiserror = "1.0"
|
||||
bit-vec = "0.6"
|
||||
half ="1.6"
|
||||
bitflags = "1.2"
|
||||
|
||||
[build-dependencies.lalrpop]
|
||||
version = "0.19"
|
||||
features = ["lexer"]
|
||||
rustc-hash = "2.0.0"
|
||||
strum = "0.26"
|
||||
strum_macros = "0.26"
|
||||
petgraph = "0.7.1"
|
||||
microlp = "0.2.10"
|
||||
int-enum = "1.1"
|
||||
unwrap_or = "1.0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
level_zero-sys = { path = "../level_zero-sys" }
|
||||
level_zero = { path = "../level_zero" }
|
||||
spirv_tools-sys = { path = "../spirv_tools-sys" }
|
||||
hip_runtime-sys = { path = "../ext/hip_runtime-sys" }
|
||||
comgr = { path = "../comgr" }
|
||||
tempfile = "3"
|
||||
paste = "1.0"
|
||||
cuda-driver-sys = "0.3.0"
|
||||
pretty_assertions = "1.4.1"
|
@ -1,5 +0,0 @@
|
||||
extern crate lalrpop;
|
||||
|
||||
fn main() {
|
||||
lalrpop::process_root().unwrap();
|
||||
}
|
BIN
ptx/lib/zluda_ptx_impl.bc
Normal file
BIN
ptx/lib/zluda_ptx_impl.bc
Normal file
Binary file not shown.
@ -1,146 +0,0 @@
|
||||
// Every time this file changes it must te rebuilt:
|
||||
// ocloc -file zluda_ptx_impl.cl -64 -options "-cl-std=CL2.0 -Dcl_intel_bit_instructions" -out_dir . -device kbl -output_no_suffix -spv_only
|
||||
// Additionally you should strip names:
|
||||
// spirv-opt --strip-debug zluda_ptx_impl.spv -o zluda_ptx_impl.spv
|
||||
|
||||
#define FUNC(NAME) __zluda_ptx_impl__ ## NAME
|
||||
|
||||
#define atomic_inc(NAME, SUCCESS, FAILURE, SCOPE, SPACE) \
|
||||
uint FUNC(NAME)(SPACE uint* ptr, uint threshold) { \
|
||||
uint expected = *ptr; \
|
||||
uint desired; \
|
||||
do { \
|
||||
desired = (expected >= threshold) ? 0 : expected + 1; \
|
||||
} while (!atomic_compare_exchange_strong_explicit((volatile SPACE atomic_uint*)ptr, &expected, desired, SUCCESS, FAILURE, SCOPE)); \
|
||||
return expected; \
|
||||
}
|
||||
|
||||
#define atomic_dec(NAME, SUCCESS, FAILURE, SCOPE, SPACE) \
|
||||
uint FUNC(NAME)(SPACE uint* ptr, uint threshold) { \
|
||||
uint expected = *ptr; \
|
||||
uint desired; \
|
||||
do { \
|
||||
desired = (expected == 0 || expected > threshold) ? threshold : expected - 1; \
|
||||
} while (!atomic_compare_exchange_strong_explicit((volatile SPACE atomic_uint*)ptr, &expected, desired, SUCCESS, FAILURE, SCOPE)); \
|
||||
return expected; \
|
||||
}
|
||||
|
||||
// We are doing all this mess instead of accepting memory_order and memory_scope parameters
|
||||
// because ocloc emits broken (failing spirv-dis) SPIR-V when memory_order or memory_scope is a parameter
|
||||
|
||||
// atom.inc
|
||||
atomic_inc(atom_relaxed_cta_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, );
|
||||
atomic_inc(atom_acquire_cta_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, );
|
||||
atomic_inc(atom_release_cta_generic_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, );
|
||||
atomic_inc(atom_acq_rel_cta_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, );
|
||||
|
||||
atomic_inc(atom_relaxed_gpu_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||
atomic_inc(atom_acquire_gpu_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||
atomic_inc(atom_release_gpu_generic_inc, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||
atomic_inc(atom_acq_rel_gpu_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||
|
||||
atomic_inc(atom_relaxed_sys_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||
atomic_inc(atom_acquire_sys_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||
atomic_inc(atom_release_sys_generic_inc, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||
atomic_inc(atom_acq_rel_sys_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||
|
||||
atomic_inc(atom_relaxed_cta_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global);
|
||||
atomic_inc(atom_acquire_cta_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global);
|
||||
atomic_inc(atom_release_cta_global_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, __global);
|
||||
atomic_inc(atom_acq_rel_cta_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global);
|
||||
|
||||
atomic_inc(atom_relaxed_gpu_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||
atomic_inc(atom_acquire_gpu_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_inc(atom_release_gpu_global_inc, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_inc(atom_acq_rel_gpu_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||
|
||||
atomic_inc(atom_relaxed_sys_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||
atomic_inc(atom_acquire_sys_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_inc(atom_release_sys_global_inc, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_inc(atom_acq_rel_sys_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||
|
||||
atomic_inc(atom_relaxed_cta_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local);
|
||||
atomic_inc(atom_acquire_cta_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local);
|
||||
atomic_inc(atom_release_cta_shared_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, __local);
|
||||
atomic_inc(atom_acq_rel_cta_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local);
|
||||
|
||||
atomic_inc(atom_relaxed_gpu_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||
atomic_inc(atom_acquire_gpu_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_inc(atom_release_gpu_shared_inc, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_inc(atom_acq_rel_gpu_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
||||
|
||||
atomic_inc(atom_relaxed_sys_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||
atomic_inc(atom_acquire_sys_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_inc(atom_release_sys_shared_inc, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_inc(atom_acq_rel_sys_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
||||
|
||||
// atom.dec
|
||||
atomic_dec(atom_relaxed_cta_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, );
|
||||
atomic_dec(atom_acquire_cta_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, );
|
||||
atomic_dec(atom_release_cta_generic_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, );
|
||||
atomic_dec(atom_acq_rel_cta_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, );
|
||||
|
||||
atomic_dec(atom_relaxed_gpu_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||
atomic_dec(atom_acquire_gpu_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||
atomic_dec(atom_release_gpu_generic_dec, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||
atomic_dec(atom_acq_rel_gpu_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||
|
||||
atomic_dec(atom_relaxed_sys_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||
atomic_dec(atom_acquire_sys_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||
atomic_dec(atom_release_sys_generic_dec, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||
atomic_dec(atom_acq_rel_sys_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||
|
||||
atomic_dec(atom_relaxed_cta_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global);
|
||||
atomic_dec(atom_acquire_cta_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global);
|
||||
atomic_dec(atom_release_cta_global_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, __global);
|
||||
atomic_dec(atom_acq_rel_cta_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global);
|
||||
|
||||
atomic_dec(atom_relaxed_gpu_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||
atomic_dec(atom_acquire_gpu_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_dec(atom_release_gpu_global_dec, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_dec(atom_acq_rel_gpu_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||
|
||||
atomic_dec(atom_relaxed_sys_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||
atomic_dec(atom_acquire_sys_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_dec(atom_release_sys_global_dec, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||
atomic_dec(atom_acq_rel_sys_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||
|
||||
atomic_dec(atom_relaxed_cta_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local);
|
||||
atomic_dec(atom_acquire_cta_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local);
|
||||
atomic_dec(atom_release_cta_shared_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, __local);
|
||||
atomic_dec(atom_acq_rel_cta_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local);
|
||||
|
||||
atomic_dec(atom_relaxed_gpu_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||
atomic_dec(atom_acquire_gpu_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_dec(atom_release_gpu_shared_dec, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_dec(atom_acq_rel_gpu_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
||||
|
||||
atomic_dec(atom_relaxed_sys_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||
atomic_dec(atom_acquire_sys_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_dec(atom_release_sys_shared_dec, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||
atomic_dec(atom_acq_rel_sys_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
||||
|
||||
uint FUNC(bfe_u32)(uint base, uint pos, uint len) {
|
||||
return intel_ubfe(base, pos, len);
|
||||
}
|
||||
|
||||
ulong FUNC(bfe_u64)(ulong base, uint pos, uint len) {
|
||||
return intel_ubfe(base, pos, len);
|
||||
}
|
||||
|
||||
int FUNC(bfe_s32)(int base, uint pos, uint len) {
|
||||
return intel_sbfe(base, pos, len);
|
||||
}
|
||||
|
||||
long FUNC(bfe_s64)(long base, uint pos, uint len) {
|
||||
return intel_sbfe(base, pos, len);
|
||||
}
|
||||
|
||||
void FUNC(__assertfail)(
|
||||
__private ulong* message,
|
||||
__private ulong* file,
|
||||
__private uint* line,
|
||||
__private ulong* function,
|
||||
__private ulong* charSize
|
||||
) {
|
||||
}
|
169
ptx/lib/zluda_ptx_impl.cpp
Normal file
169
ptx/lib/zluda_ptx_impl.cpp
Normal file
@ -0,0 +1,169 @@
|
||||
// Every time this file changes it must te rebuilt, you need `rocm-llvm-dev` and `llvm-17`
|
||||
// `fdenormal-fp-math=dynamic` is required to make functions eligible for inlining
|
||||
// /opt/rocm/llvm/bin/clang -Xclang -fdenormal-fp-math=dynamic -Wall -Wextra -Wsign-compare -Wconversion -x hip zluda_ptx_impl.cpp -nogpulib -O3 -mno-wavefrontsize64 -o zluda_ptx_impl.bc -emit-llvm -c --offload-device-only --offload-arch=gfx1010 && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc -o - | sed '/@llvm.used/d' | sed '/wchar_size/d' | sed '/llvm.module.flags/d' | sed 's/define hidden/define linkonce_odr/g' | sed 's/\"target-cpu\"=\"gfx1010\"//g' | sed -E 's/\"target-features\"=\"[^\"]+\"//g' | sed 's/ nneg / /g' | sed 's/ disjoint / /g' | llvm-as-17 - -o zluda_ptx_impl.bc && /opt/rocm/llvm/bin/llvm-dis zluda_ptx_impl.bc
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <hip/amd_detail/amd_device_functions.h>
|
||||
|
||||
#define FUNC(NAME) __device__ __attribute__((retain)) __zluda_ptx_impl_##NAME
|
||||
|
||||
extern "C"
|
||||
{
|
||||
uint32_t FUNC(activemask)()
|
||||
{
|
||||
return __builtin_amdgcn_read_exec_lo();
|
||||
}
|
||||
|
||||
size_t __ockl_get_local_id(uint32_t) __device__;
|
||||
uint32_t FUNC(sreg_tid)(uint8_t member)
|
||||
{
|
||||
return (uint32_t)__ockl_get_local_id(member);
|
||||
}
|
||||
|
||||
size_t __ockl_get_local_size(uint32_t) __device__;
|
||||
uint32_t FUNC(sreg_ntid)(uint8_t member)
|
||||
{
|
||||
return (uint32_t)__ockl_get_local_size(member);
|
||||
}
|
||||
|
||||
size_t __ockl_get_group_id(uint32_t) __device__;
|
||||
uint32_t FUNC(sreg_ctaid)(uint8_t member)
|
||||
{
|
||||
return (uint32_t)__ockl_get_group_id(member);
|
||||
}
|
||||
|
||||
size_t __ockl_get_num_groups(uint32_t) __device__;
|
||||
uint32_t FUNC(sreg_nctaid)(uint8_t member)
|
||||
{
|
||||
return (uint32_t)__ockl_get_num_groups(member);
|
||||
}
|
||||
|
||||
uint32_t __ockl_bfe_u32(uint32_t, uint32_t, uint32_t) __device__;
|
||||
uint32_t FUNC(bfe_u32)(uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
uint32_t len = len_32 & 0xFFU;
|
||||
if (pos >= 32)
|
||||
return 0;
|
||||
// V_BFE_U32 only uses bits [4:0] for len (max value is 31)
|
||||
if (len >= 32)
|
||||
return base >> pos;
|
||||
len = std::min(len, 31U);
|
||||
return __ockl_bfe_u32(base, pos, len);
|
||||
}
|
||||
|
||||
// LLVM contains mentions of llvm.amdgcn.ubfe.i64 and llvm.amdgcn.sbfe.i64,
|
||||
// but using it only leads to LLVM crashes on RDNA2
|
||||
uint64_t FUNC(bfe_u64)(uint64_t base, uint32_t pos, uint32_t len)
|
||||
{
|
||||
// NVIDIA docs are incorrect. In 64 bit `bfe` both `pos` and `len`
|
||||
// parameters use whole 32 bit number and not just bottom 8 bits
|
||||
if (pos >= 64)
|
||||
return 0;
|
||||
if (len >= 64)
|
||||
return base >> pos;
|
||||
len = std::min(len, 63U);
|
||||
return (base >> pos) & ((1UL << len) - 1UL);
|
||||
}
|
||||
|
||||
int32_t __ockl_bfe_i32(int32_t, uint32_t, uint32_t) __device__;
|
||||
int32_t FUNC(bfe_s32)(int32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
uint32_t len = len_32 & 0xFFU;
|
||||
if (len == 0)
|
||||
return 0;
|
||||
if (pos >= 32)
|
||||
return (base >> 31);
|
||||
// V_BFE_I32 only uses bits [4:0] for len (max value is 31)
|
||||
if (len >= 32)
|
||||
return base >> pos;
|
||||
len = std::min(len, 31U);
|
||||
return __ockl_bfe_i32(base, pos, len);
|
||||
}
|
||||
|
||||
static __device__ uint32_t add_sat(uint32_t x, uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
if (__builtin_add_overflow(x, y, &result))
|
||||
{
|
||||
return UINT32_MAX;
|
||||
}
|
||||
else
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ uint32_t sub_sat(uint32_t x, uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
if (__builtin_sub_overflow(x, y, &result))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t FUNC(bfe_s64)(int64_t base, uint32_t pos, uint32_t len)
|
||||
{
|
||||
// NVIDIA docs are incorrect. In 64 bit `bfe` both `pos` and `len`
|
||||
// parameters use whole 32 bit number and not just bottom 8 bits
|
||||
if (len == 0)
|
||||
return 0;
|
||||
if (pos >= 64)
|
||||
return (base >> 63U);
|
||||
if (add_sat(pos, len) >= 64)
|
||||
len = sub_sat(64, pos);
|
||||
return (base << (64U - pos - len)) >> (64U - len);
|
||||
}
|
||||
|
||||
uint32_t __ockl_bfm_u32(uint32_t count, uint32_t offset) __device__;
|
||||
uint32_t FUNC(bfi_b32)(uint32_t insert, uint32_t base, uint32_t pos_32, uint32_t len_32)
|
||||
{
|
||||
uint32_t pos = pos_32 & 0xFFU;
|
||||
uint32_t len = len_32 & 0xFFU;
|
||||
if (pos >= 32)
|
||||
return base;
|
||||
uint32_t mask;
|
||||
if (len >= 32)
|
||||
mask = UINT32_MAX << pos;
|
||||
else
|
||||
mask = __ockl_bfm_u32(len, pos);
|
||||
return (~mask & base) | (mask & (insert << pos));
|
||||
}
|
||||
|
||||
uint64_t FUNC(bfi_b64)(uint64_t insert, uint64_t base, uint32_t pos, uint32_t len)
|
||||
{
|
||||
// NVIDIA docs are incorrect. In 64 bit `bfe` both `pos` and `len`
|
||||
// parameters use whole 32 bit number and not just bottom 8 bits
|
||||
if (pos >= 64)
|
||||
return base;
|
||||
uint64_t mask;
|
||||
if (len >= 64)
|
||||
mask = UINT64_MAX << pos;
|
||||
else
|
||||
mask = ((1UL << len) - 1UL) << (pos);
|
||||
return (~mask & base) | (mask & (insert << pos));
|
||||
}
|
||||
|
||||
void FUNC(bar_sync)(uint32_t)
|
||||
{
|
||||
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
|
||||
__builtin_amdgcn_s_barrier();
|
||||
}
|
||||
|
||||
void FUNC(__assertfail)(uint64_t message,
|
||||
uint64_t file,
|
||||
uint32_t line,
|
||||
uint64_t function,
|
||||
uint64_t char_size)
|
||||
{
|
||||
(void)char_size;
|
||||
__assert_fail((const char *)message, (const char *)file, line, (const char *)function);
|
||||
}
|
||||
}
|
Binary file not shown.
1406
ptx/src/ast.rs
1406
ptx/src/ast.rs
File diff suppressed because it is too large
Load Diff
@ -1,57 +1,6 @@
|
||||
#[cfg(test)]
|
||||
extern crate paste;
|
||||
#[macro_use]
|
||||
extern crate lalrpop_util;
|
||||
#[macro_use]
|
||||
extern crate quick_error;
|
||||
|
||||
extern crate bit_vec;
|
||||
extern crate half;
|
||||
#[cfg(test)]
|
||||
extern crate level_zero as ze;
|
||||
#[cfg(test)]
|
||||
extern crate level_zero_sys as l0;
|
||||
extern crate rspirv;
|
||||
extern crate spirv_headers as spirv;
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate spirv_tools_sys as spirv_tools;
|
||||
|
||||
#[macro_use]
|
||||
extern crate bitflags;
|
||||
|
||||
lalrpop_mod!(
|
||||
#[allow(warnings)]
|
||||
ptx
|
||||
);
|
||||
|
||||
pub mod ast;
|
||||
pub(crate) mod pass;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
mod translate;
|
||||
|
||||
pub use crate::ptx::ModuleParser;
|
||||
pub use lalrpop_util::lexer::Token;
|
||||
pub use lalrpop_util::ParseError;
|
||||
pub use rspirv::dr::Error as SpirvError;
|
||||
pub use translate::to_spirv_module;
|
||||
pub use translate::KernelInfo;
|
||||
pub use translate::TranslateError;
|
||||
pub use pass::to_llvm_module;
|
||||
|
||||
pub(crate) fn without_none<T>(x: Vec<Option<T>>) -> Vec<T> {
|
||||
x.into_iter().filter_map(|x| x).collect()
|
||||
}
|
||||
|
||||
pub(crate) fn vector_index<'input>(
|
||||
inp: &'input str,
|
||||
) -> Result<u8, ParseError<usize, lalrpop_util::lexer::Token<'input>, ast::PtxError>> {
|
||||
match inp {
|
||||
"x" | "r" => Ok(0),
|
||||
"y" | "g" => Ok(1),
|
||||
"z" | "b" => Ok(2),
|
||||
"w" | "a" => Ok(3),
|
||||
_ => Err(ParseError::User {
|
||||
error: ast::PtxError::WrongVectorElement,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
191
ptx/src/pass/deparamize_functions.rs
Normal file
191
ptx/src/pass/deparamize_functions.rs
Normal file
@ -0,0 +1,191 @@
|
||||
use super::*;
|
||||
|
||||
pub(super) fn run<'a, 'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(resolver, directive))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn run_directive<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2,
|
||||
directive: Directive2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<Directive2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
Ok(match directive {
|
||||
var @ Directive2::Variable(..) => var,
|
||||
Directive2::Method(method) => Directive2::Method(run_method(resolver, method)?),
|
||||
})
|
||||
}
|
||||
|
||||
fn run_method<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2,
|
||||
mut method: Function2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<Function2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
let is_declaration = method.body.is_none();
|
||||
let mut body = Vec::new();
|
||||
let mut remap_returns = Vec::new();
|
||||
if !method.is_kernel {
|
||||
for arg in method.return_arguments.iter_mut() {
|
||||
match arg.state_space {
|
||||
ptx_parser::StateSpace::Param => {
|
||||
arg.state_space = ptx_parser::StateSpace::Reg;
|
||||
let old_name = arg.name;
|
||||
arg.name =
|
||||
resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space)));
|
||||
if is_declaration {
|
||||
continue;
|
||||
}
|
||||
remap_returns.push((old_name, arg.name, arg.v_type.clone()));
|
||||
body.push(Statement::Variable(ast::Variable {
|
||||
align: None,
|
||||
name: old_name,
|
||||
v_type: arg.v_type.clone(),
|
||||
state_space: ptx_parser::StateSpace::Param,
|
||||
array_init: Vec::new(),
|
||||
}));
|
||||
}
|
||||
ptx_parser::StateSpace::Reg => {}
|
||||
_ => return Err(error_unreachable()),
|
||||
}
|
||||
}
|
||||
for arg in method.input_arguments.iter_mut() {
|
||||
match arg.state_space {
|
||||
ptx_parser::StateSpace::Param => {
|
||||
arg.state_space = ptx_parser::StateSpace::Reg;
|
||||
let old_name = arg.name;
|
||||
arg.name =
|
||||
resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space)));
|
||||
if is_declaration {
|
||||
continue;
|
||||
}
|
||||
body.push(Statement::Variable(ast::Variable {
|
||||
align: None,
|
||||
name: old_name,
|
||||
v_type: arg.v_type.clone(),
|
||||
state_space: ptx_parser::StateSpace::Param,
|
||||
array_init: Vec::new(),
|
||||
}));
|
||||
body.push(Statement::Instruction(ast::Instruction::St {
|
||||
data: ast::StData {
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
state_space: ast::StateSpace::Param,
|
||||
caching: ast::StCacheOperator::Writethrough,
|
||||
typ: arg.v_type.clone(),
|
||||
},
|
||||
arguments: ast::StArgs {
|
||||
src1: old_name,
|
||||
src2: arg.name,
|
||||
},
|
||||
}));
|
||||
}
|
||||
ptx_parser::StateSpace::Reg => {}
|
||||
_ => return Err(error_unreachable()),
|
||||
}
|
||||
}
|
||||
}
|
||||
let body = method
|
||||
.body
|
||||
.map(|statements| {
|
||||
for statement in statements {
|
||||
run_statement(resolver, &remap_returns, &mut body, statement)?;
|
||||
}
|
||||
Ok::<_, TranslateError>(body)
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(Function2 { body, ..method })
|
||||
}
|
||||
|
||||
fn run_statement<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
remap_returns: &Vec<(SpirvWord, SpirvWord, ast::Type)>,
|
||||
result: &mut Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
statement: Statement<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<(), TranslateError> {
|
||||
match statement {
|
||||
Statement::Instruction(ast::Instruction::Call {
|
||||
mut data,
|
||||
mut arguments,
|
||||
}) => {
|
||||
let mut post_st = Vec::new();
|
||||
for ((type_, space), ident) in data
|
||||
.input_arguments
|
||||
.iter_mut()
|
||||
.zip(arguments.input_arguments.iter_mut())
|
||||
{
|
||||
if *space == ptx_parser::StateSpace::Param {
|
||||
*space = ptx_parser::StateSpace::Reg;
|
||||
let old_name = *ident;
|
||||
*ident = resolver
|
||||
.register_unnamed(Some((type_.clone(), ptx_parser::StateSpace::Reg)));
|
||||
result.push(Statement::Instruction(ast::Instruction::Ld {
|
||||
data: ast::LdDetails {
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
state_space: ast::StateSpace::Param,
|
||||
caching: ast::LdCacheOperator::Cached,
|
||||
typ: type_.clone(),
|
||||
non_coherent: false,
|
||||
},
|
||||
arguments: ast::LdArgs {
|
||||
dst: *ident,
|
||||
src: old_name,
|
||||
},
|
||||
}));
|
||||
}
|
||||
}
|
||||
for ((type_, space), ident) in data
|
||||
.return_arguments
|
||||
.iter_mut()
|
||||
.zip(arguments.return_arguments.iter_mut())
|
||||
{
|
||||
if *space == ptx_parser::StateSpace::Param {
|
||||
*space = ptx_parser::StateSpace::Reg;
|
||||
let old_name = *ident;
|
||||
*ident = resolver
|
||||
.register_unnamed(Some((type_.clone(), ptx_parser::StateSpace::Reg)));
|
||||
post_st.push(Statement::Instruction(ast::Instruction::St {
|
||||
data: ast::StData {
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
state_space: ast::StateSpace::Param,
|
||||
caching: ast::StCacheOperator::Writethrough,
|
||||
typ: type_.clone(),
|
||||
},
|
||||
arguments: ast::StArgs {
|
||||
src1: old_name,
|
||||
src2: *ident,
|
||||
},
|
||||
}));
|
||||
}
|
||||
}
|
||||
result.push(Statement::Instruction(ast::Instruction::Call {
|
||||
data,
|
||||
arguments,
|
||||
}));
|
||||
result.extend(post_st.into_iter());
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::Ret { data }) => {
|
||||
for (old_name, new_name, type_) in remap_returns.iter() {
|
||||
result.push(Statement::Instruction(ast::Instruction::Ld {
|
||||
data: ast::LdDetails {
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
state_space: ast::StateSpace::Param,
|
||||
caching: ast::LdCacheOperator::Cached,
|
||||
typ: type_.clone(),
|
||||
non_coherent: false,
|
||||
},
|
||||
arguments: ast::LdArgs {
|
||||
dst: *new_name,
|
||||
src: *old_name,
|
||||
},
|
||||
}));
|
||||
}
|
||||
result.push(Statement::Instruction(ast::Instruction::Ret { data }));
|
||||
}
|
||||
statement => {
|
||||
result.push(statement);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
2683
ptx/src/pass/emit_llvm.rs
Normal file
2683
ptx/src/pass/emit_llvm.rs
Normal file
File diff suppressed because it is too large
Load Diff
301
ptx/src/pass/expand_operands.rs
Normal file
301
ptx/src/pass/expand_operands.rs
Normal file
@ -0,0 +1,301 @@
|
||||
use super::*;
|
||||
|
||||
pub(super) fn run<'a, 'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<UnconditionalDirective>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(resolver, directive))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn run_directive<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directive: Directive2<
|
||||
ast::Instruction<ast::ParsedOperand<SpirvWord>>,
|
||||
ast::ParsedOperand<SpirvWord>,
|
||||
>,
|
||||
) -> Result<Directive2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
Ok(match directive {
|
||||
Directive2::Variable(linking, var) => Directive2::Variable(linking, var),
|
||||
Directive2::Method(method) => Directive2::Method(run_method(resolver, method)?),
|
||||
})
|
||||
}
|
||||
|
||||
fn run_method<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
method: Function2<
|
||||
ast::Instruction<ast::ParsedOperand<SpirvWord>>,
|
||||
ast::ParsedOperand<SpirvWord>,
|
||||
>,
|
||||
) -> Result<Function2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
let body = method
|
||||
.body
|
||||
.map(|statements| {
|
||||
let mut result = Vec::with_capacity(statements.len());
|
||||
for statement in statements {
|
||||
run_statement(resolver, &mut result, statement)?;
|
||||
}
|
||||
Ok::<_, TranslateError>(result)
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(Function2 {
|
||||
body,
|
||||
return_arguments: method.return_arguments,
|
||||
name: method.name,
|
||||
input_arguments: method.input_arguments,
|
||||
import_as: method.import_as,
|
||||
tuning: method.tuning,
|
||||
linkage: method.linkage,
|
||||
is_kernel: method.is_kernel,
|
||||
flush_to_zero_f32: method.flush_to_zero_f32,
|
||||
flush_to_zero_f16f64: method.flush_to_zero_f16f64,
|
||||
rounding_mode_f32: method.rounding_mode_f32,
|
||||
rounding_mode_f16f64: method.rounding_mode_f16f64,
|
||||
})
|
||||
}
|
||||
|
||||
fn run_statement<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
result: &mut Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
statement: UnconditionalStatement,
|
||||
) -> Result<(), TranslateError> {
|
||||
let mut visitor = FlattenArguments::new(resolver, result);
|
||||
let new_statement = statement.visit_map(&mut visitor)?;
|
||||
visitor.result.push(new_statement);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct FlattenArguments<'a, 'input> {
|
||||
result: &'a mut Vec<ExpandedStatement>,
|
||||
resolver: &'a mut GlobalStringIdentResolver2<'input>,
|
||||
post_stmts: Vec<ExpandedStatement>,
|
||||
}
|
||||
|
||||
impl<'a, 'input> FlattenArguments<'a, 'input> {
|
||||
fn new(
|
||||
resolver: &'a mut GlobalStringIdentResolver2<'input>,
|
||||
result: &'a mut Vec<ExpandedStatement>,
|
||||
) -> Self {
|
||||
FlattenArguments {
|
||||
result,
|
||||
resolver,
|
||||
post_stmts: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn reg(&mut self, name: SpirvWord) -> Result<SpirvWord, TranslateError> {
|
||||
Ok(name)
|
||||
}
|
||||
|
||||
fn reg_offset(
|
||||
&mut self,
|
||||
reg: SpirvWord,
|
||||
offset: i32,
|
||||
type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
_is_dst: bool,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
let (type_, state_space) = if let Some((type_, state_space)) = type_space {
|
||||
(type_, state_space)
|
||||
} else {
|
||||
return Err(TranslateError::UntypedSymbol);
|
||||
};
|
||||
if state_space == ast::StateSpace::Reg {
|
||||
let (reg_type, reg_space) = self.resolver.get_typed(reg)?;
|
||||
if *reg_space != ast::StateSpace::Reg {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
let reg_scalar_type = match reg_type {
|
||||
ast::Type::Scalar(underlying_type) => *underlying_type,
|
||||
_ => return Err(error_mismatched_type()),
|
||||
};
|
||||
let reg_type = reg_type.clone();
|
||||
let id_constant_stmt = self
|
||||
.resolver
|
||||
.register_unnamed(Some((reg_type.clone(), ast::StateSpace::Reg)));
|
||||
self.result.push(Statement::Constant(ConstantDefinition {
|
||||
dst: id_constant_stmt,
|
||||
typ: reg_scalar_type,
|
||||
value: ast::ImmediateValue::S64(offset as i64),
|
||||
}));
|
||||
let arith_details = match reg_scalar_type.kind() {
|
||||
ast::ScalarKind::Signed => ast::ArithDetails::Integer(ast::ArithInteger {
|
||||
type_: reg_scalar_type,
|
||||
saturate: false,
|
||||
}),
|
||||
ast::ScalarKind::Unsigned | ast::ScalarKind::Bit => {
|
||||
ast::ArithDetails::Integer(ast::ArithInteger {
|
||||
type_: reg_scalar_type,
|
||||
saturate: false,
|
||||
})
|
||||
}
|
||||
_ => return Err(error_unreachable()),
|
||||
};
|
||||
let id_add_result = self
|
||||
.resolver
|
||||
.register_unnamed(Some((reg_type, state_space)));
|
||||
self.result
|
||||
.push(Statement::Instruction(ast::Instruction::Add {
|
||||
data: arith_details,
|
||||
arguments: ast::AddArgs {
|
||||
dst: id_add_result,
|
||||
src1: reg,
|
||||
src2: id_constant_stmt,
|
||||
},
|
||||
}));
|
||||
Ok(id_add_result)
|
||||
} else {
|
||||
let id_constant_stmt = self.resolver.register_unnamed(Some((
|
||||
ast::Type::Scalar(ast::ScalarType::S64),
|
||||
ast::StateSpace::Reg,
|
||||
)));
|
||||
self.result.push(Statement::Constant(ConstantDefinition {
|
||||
dst: id_constant_stmt,
|
||||
typ: ast::ScalarType::S64,
|
||||
value: ast::ImmediateValue::S64(offset as i64),
|
||||
}));
|
||||
let dst = self
|
||||
.resolver
|
||||
.register_unnamed(Some((type_.clone(), state_space)));
|
||||
self.result.push(Statement::PtrAccess(PtrAccess {
|
||||
underlying_type: type_.clone(),
|
||||
state_space: state_space,
|
||||
dst,
|
||||
ptr_src: reg,
|
||||
offset_src: id_constant_stmt,
|
||||
}));
|
||||
Ok(dst)
|
||||
}
|
||||
}
|
||||
|
||||
fn immediate(
|
||||
&mut self,
|
||||
value: ast::ImmediateValue,
|
||||
type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
let (scalar_t, state_space) =
|
||||
if let Some((ast::Type::Scalar(scalar), state_space)) = type_space {
|
||||
(*scalar, state_space)
|
||||
} else {
|
||||
return Err(TranslateError::UntypedSymbol);
|
||||
};
|
||||
let id = self
|
||||
.resolver
|
||||
.register_unnamed(Some((ast::Type::Scalar(scalar_t), state_space)));
|
||||
self.result.push(Statement::Constant(ConstantDefinition {
|
||||
dst: id,
|
||||
typ: scalar_t,
|
||||
value,
|
||||
}));
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn vec_member(
|
||||
&mut self,
|
||||
vector_ident: SpirvWord,
|
||||
member: u8,
|
||||
_type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
is_dst: bool,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
let (vector_width, scalar_type, space) = match self.resolver.get_typed(vector_ident)? {
|
||||
(ast::Type::Vector(vector_width, scalar_t), space) => {
|
||||
(*vector_width, *scalar_t, *space)
|
||||
}
|
||||
_ => return Err(error_mismatched_type()),
|
||||
};
|
||||
let temporary = self
|
||||
.resolver
|
||||
.register_unnamed(Some((scalar_type.into(), space)));
|
||||
if is_dst {
|
||||
self.post_stmts.push(Statement::VectorWrite(VectorWrite {
|
||||
scalar_type,
|
||||
vector_width,
|
||||
vector_dst: vector_ident,
|
||||
vector_src: vector_ident,
|
||||
scalar_src: temporary,
|
||||
member,
|
||||
}));
|
||||
} else {
|
||||
self.result.push(Statement::VectorRead(VectorRead {
|
||||
scalar_type,
|
||||
vector_width,
|
||||
scalar_dst: temporary,
|
||||
vector_src: vector_ident,
|
||||
member,
|
||||
}));
|
||||
}
|
||||
Ok(temporary)
|
||||
}
|
||||
|
||||
fn vec_pack(
|
||||
&mut self,
|
||||
vector_elements: Vec<SpirvWord>,
|
||||
type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
is_dst: bool,
|
||||
relaxed_type_check: bool,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
let (width, scalar_t, state_space) = match type_space {
|
||||
Some((ast::Type::Vector(width, scalar_t), space)) => (*width, *scalar_t, space),
|
||||
_ => return Err(error_mismatched_type()),
|
||||
};
|
||||
let temporary_vector = self
|
||||
.resolver
|
||||
.register_unnamed(Some((ast::Type::Vector(width, scalar_t), state_space)));
|
||||
let statement = Statement::RepackVector(RepackVectorDetails {
|
||||
is_extract: is_dst,
|
||||
typ: scalar_t,
|
||||
packed: temporary_vector,
|
||||
unpacked: vector_elements,
|
||||
relaxed_type_check,
|
||||
});
|
||||
if is_dst {
|
||||
self.post_stmts.push(statement);
|
||||
} else {
|
||||
self.result.push(statement);
|
||||
}
|
||||
Ok(temporary_vector)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b> ast::VisitorMap<ast::ParsedOperand<SpirvWord>, SpirvWord, TranslateError>
|
||||
for FlattenArguments<'a, 'b>
|
||||
{
|
||||
fn visit(
|
||||
&mut self,
|
||||
args: ast::ParsedOperand<SpirvWord>,
|
||||
type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
is_dst: bool,
|
||||
relaxed_type_check: bool,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
match args {
|
||||
ast::ParsedOperand::Reg(r) => self.reg(r),
|
||||
ast::ParsedOperand::Imm(x) => self.immediate(x, type_space),
|
||||
ast::ParsedOperand::RegOffset(reg, offset) => {
|
||||
self.reg_offset(reg, offset, type_space, is_dst)
|
||||
}
|
||||
ast::ParsedOperand::VecMember(vec, member) => {
|
||||
self.vec_member(vec, member, type_space, is_dst)
|
||||
}
|
||||
ast::ParsedOperand::VecPack(vecs) => {
|
||||
self.vec_pack(vecs, type_space, is_dst, relaxed_type_check)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_ident(
|
||||
&mut self,
|
||||
name: SpirvWord,
|
||||
_type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
_is_dst: bool,
|
||||
_relaxed_type_check: bool,
|
||||
) -> Result<<SpirvWord as ast::Operand>::Ident, TranslateError> {
|
||||
self.reg(name)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for FlattenArguments<'_, '_> {
|
||||
fn drop(&mut self) {
|
||||
self.result.extend(self.post_stmts.drain(..));
|
||||
}
|
||||
}
|
208
ptx/src/pass/fix_special_registers2.rs
Normal file
208
ptx/src/pass/fix_special_registers2.rs
Normal file
@ -0,0 +1,208 @@
|
||||
use super::*;
|
||||
|
||||
pub(super) fn run<'a, 'input>(
|
||||
resolver: &'a mut GlobalStringIdentResolver2<'input>,
|
||||
special_registers: &'a SpecialRegistersMap2,
|
||||
directives: Vec<UnconditionalDirective>,
|
||||
) -> Result<Vec<UnconditionalDirective>, TranslateError> {
|
||||
let mut result = Vec::with_capacity(SpecialRegistersMap2::len() + directives.len());
|
||||
let mut sreg_to_function =
|
||||
FxHashMap::with_capacity_and_hasher(SpecialRegistersMap2::len(), Default::default());
|
||||
SpecialRegistersMap2::foreach_declaration(
|
||||
resolver,
|
||||
|sreg, (return_arguments, name, input_arguments)| {
|
||||
result.push(UnconditionalDirective::Method(UnconditionalFunction {
|
||||
return_arguments,
|
||||
name,
|
||||
input_arguments,
|
||||
body: None,
|
||||
import_as: None,
|
||||
tuning: Vec::new(),
|
||||
linkage: ast::LinkingDirective::EXTERN,
|
||||
is_kernel: false,
|
||||
flush_to_zero_f32: false,
|
||||
flush_to_zero_f16f64: false,
|
||||
rounding_mode_f32: ptx_parser::RoundingMode::NearestEven,
|
||||
rounding_mode_f16f64: ptx_parser::RoundingMode::NearestEven,
|
||||
}));
|
||||
sreg_to_function.insert(sreg, name);
|
||||
},
|
||||
);
|
||||
let mut visitor = SpecialRegisterResolver {
|
||||
resolver,
|
||||
special_registers,
|
||||
sreg_to_function,
|
||||
result: Vec::new(),
|
||||
};
|
||||
for directive in directives.into_iter() {
|
||||
result.push(run_directive(&mut visitor, directive)?);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn run_directive<'a, 'input>(
|
||||
visitor: &mut SpecialRegisterResolver<'a, 'input>,
|
||||
directive: UnconditionalDirective,
|
||||
) -> Result<UnconditionalDirective, TranslateError> {
|
||||
Ok(match directive {
|
||||
var @ Directive2::Variable(..) => var,
|
||||
Directive2::Method(method) => Directive2::Method(run_method(visitor, method)?),
|
||||
})
|
||||
}
|
||||
|
||||
fn run_method<'a, 'input>(
|
||||
visitor: &mut SpecialRegisterResolver<'a, 'input>,
|
||||
method: UnconditionalFunction,
|
||||
) -> Result<UnconditionalFunction, TranslateError> {
|
||||
let body = method
|
||||
.body
|
||||
.map(|statements| {
|
||||
let mut result = Vec::with_capacity(statements.len());
|
||||
for statement in statements {
|
||||
run_statement(visitor, &mut result, statement)?;
|
||||
}
|
||||
Ok::<_, TranslateError>(result)
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(Function2 { body, ..method })
|
||||
}
|
||||
|
||||
fn run_statement<'a, 'input>(
|
||||
visitor: &mut SpecialRegisterResolver<'a, 'input>,
|
||||
result: &mut Vec<UnconditionalStatement>,
|
||||
statement: UnconditionalStatement,
|
||||
) -> Result<(), TranslateError> {
|
||||
let converted_statement = statement.visit_map(visitor)?;
|
||||
result.extend(visitor.result.drain(..));
|
||||
result.push(converted_statement);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct SpecialRegisterResolver<'a, 'input> {
|
||||
resolver: &'a mut GlobalStringIdentResolver2<'input>,
|
||||
special_registers: &'a SpecialRegistersMap2,
|
||||
sreg_to_function: FxHashMap<PtxSpecialRegister, SpirvWord>,
|
||||
result: Vec<UnconditionalStatement>,
|
||||
}
|
||||
|
||||
impl<'a, 'b, 'input>
|
||||
ast::VisitorMap<ast::ParsedOperand<SpirvWord>, ast::ParsedOperand<SpirvWord>, TranslateError>
|
||||
for SpecialRegisterResolver<'a, 'input>
|
||||
{
|
||||
fn visit(
|
||||
&mut self,
|
||||
operand: ast::ParsedOperand<SpirvWord>,
|
||||
_type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
|
||||
is_dst: bool,
|
||||
_relaxed_type_check: bool,
|
||||
) -> Result<ast::ParsedOperand<SpirvWord>, TranslateError> {
|
||||
map_operand(operand, &mut |ident, vector_index| {
|
||||
self.replace_sreg(ident, vector_index, is_dst)
|
||||
})
|
||||
}
|
||||
|
||||
fn visit_ident(
|
||||
&mut self,
|
||||
args: SpirvWord,
|
||||
_type_space: Option<(&ptx_parser::Type, ptx_parser::StateSpace)>,
|
||||
is_dst: bool,
|
||||
_relaxed_type_check: bool,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
Ok(self.replace_sreg(args, None, is_dst)?.unwrap_or(args))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b, 'input> SpecialRegisterResolver<'a, 'input> {
|
||||
fn replace_sreg(
|
||||
&mut self,
|
||||
name: SpirvWord,
|
||||
vector_index: Option<u8>,
|
||||
is_dst: bool,
|
||||
) -> Result<Option<SpirvWord>, TranslateError> {
|
||||
if let Some(sreg) = self.special_registers.get(name) {
|
||||
if is_dst {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
let input_arguments = match (vector_index, sreg.get_function_input_type()) {
|
||||
(Some(idx), Some(inp_type)) => {
|
||||
if inp_type != ast::ScalarType::U8 {
|
||||
return Err(TranslateError::Unreachable);
|
||||
}
|
||||
let constant = self.resolver.register_unnamed(Some((
|
||||
ast::Type::Scalar(inp_type),
|
||||
ast::StateSpace::Reg,
|
||||
)));
|
||||
self.result.push(Statement::Constant(ConstantDefinition {
|
||||
dst: constant,
|
||||
typ: inp_type,
|
||||
value: ast::ImmediateValue::U64(idx as u64),
|
||||
}));
|
||||
vec![(constant, ast::Type::Scalar(inp_type), ast::StateSpace::Reg)]
|
||||
}
|
||||
(None, None) => Vec::new(),
|
||||
_ => return Err(error_mismatched_type()),
|
||||
};
|
||||
let return_type = sreg.get_function_return_type();
|
||||
let fn_result = self
|
||||
.resolver
|
||||
.register_unnamed(Some((ast::Type::Scalar(return_type), ast::StateSpace::Reg)));
|
||||
let return_arguments = vec![(
|
||||
fn_result,
|
||||
ast::Type::Scalar(return_type),
|
||||
ast::StateSpace::Reg,
|
||||
)];
|
||||
let data = ast::CallDetails {
|
||||
uniform: false,
|
||||
return_arguments: return_arguments
|
||||
.iter()
|
||||
.map(|(_, typ, space)| (typ.clone(), *space))
|
||||
.collect(),
|
||||
input_arguments: input_arguments
|
||||
.iter()
|
||||
.map(|(_, typ, space)| (typ.clone(), *space))
|
||||
.collect(),
|
||||
};
|
||||
let arguments = ast::CallArgs::<ast::ParsedOperand<SpirvWord>> {
|
||||
return_arguments: return_arguments.iter().map(|(name, _, _)| *name).collect(),
|
||||
func: self.sreg_to_function[&sreg],
|
||||
input_arguments: input_arguments
|
||||
.iter()
|
||||
.map(|(name, _, _)| ast::ParsedOperand::Reg(*name))
|
||||
.collect(),
|
||||
};
|
||||
self.result
|
||||
.push(Statement::Instruction(ast::Instruction::Call {
|
||||
data,
|
||||
arguments,
|
||||
}));
|
||||
Ok(Some(fn_result))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn map_operand<T: Copy, Err>(
|
||||
this: ast::ParsedOperand<T>,
|
||||
fn_: &mut impl FnMut(T, Option<u8>) -> Result<Option<T>, Err>,
|
||||
) -> Result<ast::ParsedOperand<T>, Err> {
|
||||
Ok(match this {
|
||||
ast::ParsedOperand::Reg(ident) => {
|
||||
ast::ParsedOperand::Reg(fn_(ident, None)?.unwrap_or(ident))
|
||||
}
|
||||
ast::ParsedOperand::RegOffset(ident, offset) => {
|
||||
ast::ParsedOperand::RegOffset(fn_(ident, None)?.unwrap_or(ident), offset)
|
||||
}
|
||||
ast::ParsedOperand::Imm(imm) => ast::ParsedOperand::Imm(imm),
|
||||
ast::ParsedOperand::VecMember(ident, member) => match fn_(ident, Some(member))? {
|
||||
Some(ident) => ast::ParsedOperand::Reg(ident),
|
||||
None => ast::ParsedOperand::VecMember(ident, member),
|
||||
},
|
||||
ast::ParsedOperand::VecPack(idents) => ast::ParsedOperand::VecPack(
|
||||
idents
|
||||
.into_iter()
|
||||
.map(|ident| Ok(fn_(ident, None)?.unwrap_or(ident)))
|
||||
.collect::<Result<Vec<_>, _>>()?,
|
||||
),
|
||||
})
|
||||
}
|
45
ptx/src/pass/hoist_globals.rs
Normal file
45
ptx/src/pass/hoist_globals.rs
Normal file
@ -0,0 +1,45 @@
|
||||
use super::*;
|
||||
|
||||
pub(super) fn run<'input>(
|
||||
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
let mut result = Vec::with_capacity(directives.len());
|
||||
for mut directive in directives.into_iter() {
|
||||
run_directive(&mut result, &mut directive)?;
|
||||
result.push(directive);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn run_directive<'input>(
|
||||
result: &mut Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
directive: &mut Directive2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<(), TranslateError> {
|
||||
match directive {
|
||||
Directive2::Variable(..) => {}
|
||||
Directive2::Method(function2) => run_function(result, function2),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_function<'input>(
|
||||
result: &mut Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
function: &mut Function2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) {
|
||||
function.body = function.body.take().map(|statements| {
|
||||
statements
|
||||
.into_iter()
|
||||
.filter_map(|statement| match statement {
|
||||
Statement::Variable(var @ ast::Variable {
|
||||
state_space:
|
||||
ast::StateSpace::Global | ast::StateSpace::Const | ast::StateSpace::Shared,
|
||||
..
|
||||
}) => {
|
||||
result.push(Directive2::Variable(ast::LinkingDirective::NONE, var));
|
||||
None
|
||||
}
|
||||
s => Some(s),
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
}
|
404
ptx/src/pass/insert_explicit_load_store.rs
Normal file
404
ptx/src/pass/insert_explicit_load_store.rs
Normal file
@ -0,0 +1,404 @@
|
||||
use super::*;
|
||||
// This pass:
|
||||
// * Turns all .local, .param and .reg in-body variables into .local variables
|
||||
// (if _not_ an input method argument)
|
||||
// * Inserts explicit `ld`/`st` for newly converted .reg variables
|
||||
// * Fixup state space of all existing `ld`/`st` instructions into newly
|
||||
// converted variables
|
||||
// * Turns `.entry` input arguments into param::entry and all related `.param`
|
||||
// loads into `param::entry` loads
|
||||
// * All `.func` input arguments are turned into `.reg` arguments by another
|
||||
// pass, so we do nothing there
|
||||
pub(super) fn run<'a, 'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(resolver, directive))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn run_directive<'a, 'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directive: Directive2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<Directive2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
Ok(match directive {
|
||||
var @ Directive2::Variable(..) => var,
|
||||
Directive2::Method(method) => {
|
||||
let visitor = InsertMemSSAVisitor::new(resolver);
|
||||
Directive2::Method(run_method(visitor, method)?)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn run_method<'a, 'input>(
|
||||
mut visitor: InsertMemSSAVisitor<'a, 'input>,
|
||||
mut method: Function2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<Function2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
let is_kernel = method.is_kernel;
|
||||
if is_kernel {
|
||||
for arg in method.input_arguments.iter_mut() {
|
||||
let old_name = arg.name;
|
||||
let old_space = arg.state_space;
|
||||
let new_space = ast::StateSpace::ParamEntry;
|
||||
let new_name = visitor
|
||||
.resolver
|
||||
.register_unnamed(Some((arg.v_type.clone(), new_space)));
|
||||
visitor.input_argument(old_name, new_name, old_space)?;
|
||||
arg.name = new_name;
|
||||
arg.state_space = new_space;
|
||||
}
|
||||
};
|
||||
for arg in method.return_arguments.iter_mut() {
|
||||
visitor.visit_variable(arg)?;
|
||||
}
|
||||
let return_arguments = &method.return_arguments[..];
|
||||
let body = method
|
||||
.body
|
||||
.map(move |statements| {
|
||||
let mut result = Vec::with_capacity(statements.len());
|
||||
for statement in statements {
|
||||
run_statement(&mut visitor, return_arguments, &mut result, statement)?;
|
||||
}
|
||||
Ok::<_, TranslateError>(result)
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(Function2 { body, ..method })
|
||||
}
|
||||
|
||||
fn run_statement<'a, 'input>(
|
||||
visitor: &mut InsertMemSSAVisitor<'a, 'input>,
|
||||
return_arguments: &[ast::Variable<SpirvWord>],
|
||||
result: &mut Vec<ExpandedStatement>,
|
||||
statement: ExpandedStatement,
|
||||
) -> Result<(), TranslateError> {
|
||||
match statement {
|
||||
Statement::Instruction(ast::Instruction::Ret { data }) => {
|
||||
let statement = if return_arguments.is_empty() {
|
||||
Statement::Instruction(ast::Instruction::Ret { data })
|
||||
} else {
|
||||
Statement::RetValue(
|
||||
data,
|
||||
return_arguments
|
||||
.iter()
|
||||
.map(|arg| {
|
||||
if arg.state_space != ast::StateSpace::Local {
|
||||
return Err(error_unreachable());
|
||||
}
|
||||
Ok((arg.name, arg.v_type.clone()))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?,
|
||||
)
|
||||
};
|
||||
let new_statement = statement.visit_map(visitor)?;
|
||||
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
|
||||
result.push(new_statement);
|
||||
result.extend(visitor.post.drain(..).map(Statement::Instruction));
|
||||
}
|
||||
Statement::Variable(mut var) => {
|
||||
visitor.visit_variable(&mut var)?;
|
||||
result.push(Statement::Variable(var));
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::Ld { data, arguments }) => {
|
||||
let instruction = visitor.visit_ld(data, arguments)?;
|
||||
let instruction = ast::visit_map(instruction, visitor)?;
|
||||
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
|
||||
result.push(Statement::Instruction(instruction));
|
||||
result.extend(visitor.post.drain(..).map(Statement::Instruction));
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::St { data, arguments }) => {
|
||||
let instruction = visitor.visit_st(data, arguments)?;
|
||||
let instruction = ast::visit_map(instruction, visitor)?;
|
||||
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
|
||||
result.push(Statement::Instruction(instruction));
|
||||
result.extend(visitor.post.drain(..).map(Statement::Instruction));
|
||||
}
|
||||
Statement::PtrAccess(ptr_access) => {
|
||||
let statement = Statement::PtrAccess(visitor.visit_ptr_access(ptr_access)?);
|
||||
let statement = statement.visit_map(visitor)?;
|
||||
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
|
||||
result.push(statement);
|
||||
result.extend(visitor.post.drain(..).map(Statement::Instruction));
|
||||
}
|
||||
s => {
|
||||
let new_statement = s.visit_map(visitor)?;
|
||||
result.extend(visitor.pre.drain(..).map(Statement::Instruction));
|
||||
result.push(new_statement);
|
||||
result.extend(visitor.post.drain(..).map(Statement::Instruction));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct InsertMemSSAVisitor<'a, 'input> {
|
||||
resolver: &'a mut GlobalStringIdentResolver2<'input>,
|
||||
variables: FxHashMap<SpirvWord, RemapAction>,
|
||||
pre: Vec<ast::Instruction<SpirvWord>>,
|
||||
post: Vec<ast::Instruction<SpirvWord>>,
|
||||
}
|
||||
|
||||
impl<'a, 'input> InsertMemSSAVisitor<'a, 'input> {
|
||||
fn new(resolver: &'a mut GlobalStringIdentResolver2<'input>) -> Self {
|
||||
Self {
|
||||
resolver,
|
||||
variables: FxHashMap::default(),
|
||||
pre: Vec::new(),
|
||||
post: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn input_argument(
|
||||
&mut self,
|
||||
old_name: SpirvWord,
|
||||
new_name: SpirvWord,
|
||||
old_space: ast::StateSpace,
|
||||
) -> Result<(), TranslateError> {
|
||||
if old_space != ast::StateSpace::Param {
|
||||
return Err(error_unreachable());
|
||||
}
|
||||
self.variables.insert(
|
||||
old_name,
|
||||
RemapAction::LDStSpaceChange {
|
||||
name: new_name,
|
||||
old_space,
|
||||
new_space: ast::StateSpace::ParamEntry,
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn variable(
|
||||
&mut self,
|
||||
type_: &ast::Type,
|
||||
old_name: SpirvWord,
|
||||
new_name: SpirvWord,
|
||||
old_space: ast::StateSpace,
|
||||
) -> Result<bool, TranslateError> {
|
||||
Ok(match old_space {
|
||||
ast::StateSpace::Reg => {
|
||||
self.variables.insert(
|
||||
old_name,
|
||||
RemapAction::PreLdPostSt {
|
||||
name: new_name,
|
||||
type_: type_.clone(),
|
||||
},
|
||||
);
|
||||
true
|
||||
}
|
||||
ast::StateSpace::Param => {
|
||||
self.variables.insert(
|
||||
old_name,
|
||||
RemapAction::LDStSpaceChange {
|
||||
old_space,
|
||||
new_space: ast::StateSpace::Local,
|
||||
name: new_name,
|
||||
},
|
||||
);
|
||||
true
|
||||
}
|
||||
// Good as-is
|
||||
ast::StateSpace::Local
|
||||
| ast::StateSpace::Generic
|
||||
| ast::StateSpace::SharedCluster
|
||||
| ast::StateSpace::Global
|
||||
| ast::StateSpace::Const
|
||||
| ast::StateSpace::SharedCta
|
||||
| ast::StateSpace::Shared
|
||||
| ast::StateSpace::ParamEntry
|
||||
| ast::StateSpace::ParamFunc => return Err(error_unreachable()),
|
||||
})
|
||||
}
|
||||
|
||||
fn visit_st(
|
||||
&self,
|
||||
mut data: ast::StData,
|
||||
mut arguments: ast::StArgs<SpirvWord>,
|
||||
) -> Result<ast::Instruction<SpirvWord>, TranslateError> {
|
||||
if let Some(remap) = self.variables.get(&arguments.src1) {
|
||||
match remap {
|
||||
RemapAction::PreLdPostSt { .. } => {}
|
||||
RemapAction::LDStSpaceChange {
|
||||
old_space,
|
||||
new_space,
|
||||
name,
|
||||
} => {
|
||||
if data.state_space != *old_space {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
data.state_space = *new_space;
|
||||
arguments.src1 = *name;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ast::Instruction::St { data, arguments })
|
||||
}
|
||||
|
||||
fn visit_ld(
|
||||
&self,
|
||||
mut data: ast::LdDetails,
|
||||
mut arguments: ast::LdArgs<SpirvWord>,
|
||||
) -> Result<ast::Instruction<SpirvWord>, TranslateError> {
|
||||
if let Some(remap) = self.variables.get(&arguments.src) {
|
||||
match remap {
|
||||
RemapAction::PreLdPostSt { .. } => {}
|
||||
RemapAction::LDStSpaceChange {
|
||||
old_space,
|
||||
new_space,
|
||||
name,
|
||||
} => {
|
||||
if data.state_space != *old_space {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
data.state_space = *new_space;
|
||||
arguments.src = *name;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ast::Instruction::Ld { data, arguments })
|
||||
}
|
||||
|
||||
fn visit_ptr_access(
|
||||
&mut self,
|
||||
ptr_access: PtrAccess<SpirvWord>,
|
||||
) -> Result<PtrAccess<SpirvWord>, TranslateError> {
|
||||
let (old_space, new_space, name) = match self.variables.get(&ptr_access.ptr_src) {
|
||||
Some(RemapAction::LDStSpaceChange {
|
||||
old_space,
|
||||
new_space,
|
||||
name,
|
||||
}) => (*old_space, *new_space, *name),
|
||||
Some(RemapAction::PreLdPostSt { .. }) | None => return Ok(ptr_access),
|
||||
};
|
||||
if ptr_access.state_space != old_space {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
// Propagate space changes in dst
|
||||
let new_dst = self
|
||||
.resolver
|
||||
.register_unnamed(Some((ptr_access.underlying_type.clone(), new_space)));
|
||||
self.variables.insert(
|
||||
ptr_access.dst,
|
||||
RemapAction::LDStSpaceChange {
|
||||
old_space,
|
||||
new_space,
|
||||
name: new_dst,
|
||||
},
|
||||
);
|
||||
Ok(PtrAccess {
|
||||
ptr_src: name,
|
||||
dst: new_dst,
|
||||
state_space: new_space,
|
||||
..ptr_access
|
||||
})
|
||||
}
|
||||
|
||||
fn visit_variable(&mut self, var: &mut ast::Variable<SpirvWord>) -> Result<(), TranslateError> {
|
||||
let old_space = match var.state_space {
|
||||
space @ (ptx_parser::StateSpace::Reg | ptx_parser::StateSpace::Param) => space,
|
||||
// Do nothing
|
||||
ptx_parser::StateSpace::Local => return Ok(()),
|
||||
// Handled by another pass
|
||||
ptx_parser::StateSpace::Generic
|
||||
| ptx_parser::StateSpace::SharedCluster
|
||||
| ptx_parser::StateSpace::ParamEntry
|
||||
| ptx_parser::StateSpace::Global
|
||||
| ptx_parser::StateSpace::SharedCta
|
||||
| ptx_parser::StateSpace::Const
|
||||
| ptx_parser::StateSpace::Shared
|
||||
| ptx_parser::StateSpace::ParamFunc => return Ok(()),
|
||||
};
|
||||
let old_name = var.name;
|
||||
let new_space = ast::StateSpace::Local;
|
||||
let new_name = self
|
||||
.resolver
|
||||
.register_unnamed(Some((var.v_type.clone(), new_space)));
|
||||
self.variable(&var.v_type, old_name, new_name, old_space)?;
|
||||
var.name = new_name;
|
||||
var.state_space = new_space;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'input> ast::VisitorMap<SpirvWord, SpirvWord, TranslateError>
|
||||
for InsertMemSSAVisitor<'a, 'input>
|
||||
{
|
||||
fn visit(
|
||||
&mut self,
|
||||
ident: SpirvWord,
|
||||
_type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
is_dst: bool,
|
||||
_relaxed_type_check: bool,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
if let Some(remap) = self.variables.get(&ident) {
|
||||
match remap {
|
||||
RemapAction::PreLdPostSt { name, type_ } => {
|
||||
if is_dst {
|
||||
let temp = self
|
||||
.resolver
|
||||
.register_unnamed(Some((type_.clone(), ast::StateSpace::Reg)));
|
||||
self.post.push(ast::Instruction::St {
|
||||
data: ast::StData {
|
||||
state_space: ast::StateSpace::Local,
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
caching: ast::StCacheOperator::Writethrough,
|
||||
typ: type_.clone(),
|
||||
},
|
||||
arguments: ast::StArgs {
|
||||
src1: *name,
|
||||
src2: temp,
|
||||
},
|
||||
});
|
||||
Ok(temp)
|
||||
} else {
|
||||
let temp = self
|
||||
.resolver
|
||||
.register_unnamed(Some((type_.clone(), ast::StateSpace::Reg)));
|
||||
self.pre.push(ast::Instruction::Ld {
|
||||
data: ast::LdDetails {
|
||||
state_space: ast::StateSpace::Local,
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
caching: ast::LdCacheOperator::Cached,
|
||||
typ: type_.clone(),
|
||||
non_coherent: false,
|
||||
},
|
||||
arguments: ast::LdArgs {
|
||||
dst: temp,
|
||||
src: *name,
|
||||
},
|
||||
});
|
||||
Ok(temp)
|
||||
}
|
||||
}
|
||||
RemapAction::LDStSpaceChange { .. } => {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Ok(ident)
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_ident(
|
||||
&mut self,
|
||||
args: SpirvWord,
|
||||
type_space: Option<(&ast::Type, ast::StateSpace)>,
|
||||
is_dst: bool,
|
||||
relaxed_type_check: bool,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
self.visit(args, type_space, is_dst, relaxed_type_check)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum RemapAction {
|
||||
PreLdPostSt {
|
||||
name: SpirvWord,
|
||||
type_: ast::Type,
|
||||
},
|
||||
LDStSpaceChange {
|
||||
old_space: ast::StateSpace,
|
||||
new_space: ast::StateSpace,
|
||||
name: SpirvWord,
|
||||
},
|
||||
}
|
426
ptx/src/pass/insert_implicit_conversions2.rs
Normal file
426
ptx/src/pass/insert_implicit_conversions2.rs
Normal file
@ -0,0 +1,426 @@
|
||||
use std::mem;
|
||||
|
||||
use super::*;
|
||||
use ptx_parser as ast;
|
||||
|
||||
/*
|
||||
There are several kinds of implicit conversions in PTX:
|
||||
* auto-bitcast: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#type-information-for-instructions-and-operands
|
||||
* special ld/st/cvt conversion rules: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#operand-size-exceeding-instruction-type-size
|
||||
- ld.param: not documented, but for instruction `ld.param.<type> x, [y]`,
|
||||
semantics are to first zext/chop/bitcast `y` as needed and then do
|
||||
documented special ld/st/cvt conversion rules for destination operands
|
||||
- st.param [x] y (used as function return arguments) same rule as above applies
|
||||
- generic/global ld: for instruction `ld x, [y]`, y must be of type
|
||||
b64/u64/s64, which is bitcast to a pointer, dereferenced and then
|
||||
documented special ld/st/cvt conversion rules are applied to dst
|
||||
- generic/global st: for instruction `st [x], y`, x must be of type
|
||||
b64/u64/s64, which is bitcast to a pointer
|
||||
*/
|
||||
pub(super) fn run<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(resolver, directive))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn run_directive<'a, 'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directive: Directive2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<Directive2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
Ok(match directive {
|
||||
var @ Directive2::Variable(..) => var,
|
||||
Directive2::Method(mut method) => {
|
||||
method.body = method
|
||||
.body
|
||||
.map(|statements| run_statements(resolver, statements))
|
||||
.transpose()?;
|
||||
Directive2::Method(method)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn run_statements<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
func: Vec<ExpandedStatement>,
|
||||
) -> Result<Vec<ExpandedStatement>, TranslateError> {
|
||||
let mut result = Vec::with_capacity(func.len());
|
||||
for s in func.into_iter() {
|
||||
insert_implicit_conversions_impl(resolver, &mut result, s)?;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn insert_implicit_conversions_impl<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
func: &mut Vec<ExpandedStatement>,
|
||||
stmt: ExpandedStatement,
|
||||
) -> Result<(), TranslateError> {
|
||||
let mut post_conv = Vec::new();
|
||||
let statement = stmt.visit_map::<SpirvWord, TranslateError>(
|
||||
&mut |operand,
|
||||
type_state: Option<(&ast::Type, ast::StateSpace)>,
|
||||
is_dst,
|
||||
relaxed_type_check| {
|
||||
let (instr_type, instruction_space) = match type_state {
|
||||
None => return Ok(operand),
|
||||
Some(t) => t,
|
||||
};
|
||||
let (operand_type, operand_space) = resolver.get_typed(operand)?;
|
||||
let conversion_fn = if relaxed_type_check {
|
||||
if is_dst {
|
||||
should_convert_relaxed_dst_wrapper
|
||||
} else {
|
||||
should_convert_relaxed_src_wrapper
|
||||
}
|
||||
} else {
|
||||
default_implicit_conversion
|
||||
};
|
||||
match conversion_fn(
|
||||
(*operand_space, &operand_type),
|
||||
(instruction_space, instr_type),
|
||||
)? {
|
||||
Some(conv_kind) => {
|
||||
let conv_output = if is_dst { &mut post_conv } else { &mut *func };
|
||||
let mut from_type = instr_type.clone();
|
||||
let mut from_space = instruction_space;
|
||||
let mut to_type = operand_type.clone();
|
||||
let mut to_space = *operand_space;
|
||||
let mut src =
|
||||
resolver.register_unnamed(Some((instr_type.clone(), instruction_space)));
|
||||
let mut dst = operand;
|
||||
let result = Ok::<_, TranslateError>(src);
|
||||
if !is_dst {
|
||||
mem::swap(&mut src, &mut dst);
|
||||
mem::swap(&mut from_type, &mut to_type);
|
||||
mem::swap(&mut from_space, &mut to_space);
|
||||
}
|
||||
conv_output.push(Statement::Conversion(ImplicitConversion {
|
||||
src,
|
||||
dst,
|
||||
from_type,
|
||||
from_space,
|
||||
to_type,
|
||||
to_space,
|
||||
kind: conv_kind,
|
||||
}));
|
||||
result
|
||||
}
|
||||
None => Ok(operand),
|
||||
}
|
||||
},
|
||||
)?;
|
||||
func.push(statement);
|
||||
func.append(&mut post_conv);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn default_implicit_conversion(
|
||||
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
|
||||
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
|
||||
) -> Result<Option<ConversionKind>, TranslateError> {
|
||||
if instruction_space == ast::StateSpace::Reg {
|
||||
if operand_space == ast::StateSpace::Reg {
|
||||
if let (ast::Type::Vector(vec_len, vec_underlying_type), ast::Type::Scalar(scalar)) =
|
||||
(operand_type, instruction_type)
|
||||
{
|
||||
if scalar.kind() == ast::ScalarKind::Bit
|
||||
&& scalar.size_of() == (vec_underlying_type.size_of() * vec_len)
|
||||
{
|
||||
return Ok(Some(ConversionKind::Default));
|
||||
}
|
||||
}
|
||||
} else if is_addressable(operand_space) {
|
||||
return Ok(Some(ConversionKind::AddressOf));
|
||||
}
|
||||
}
|
||||
if instruction_space != operand_space {
|
||||
default_implicit_conversion_space(
|
||||
(operand_space, operand_type),
|
||||
(instruction_space, instruction_type),
|
||||
)
|
||||
} else if instruction_type != operand_type {
|
||||
default_implicit_conversion_type(instruction_space, operand_type, instruction_type)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_addressable(this: ast::StateSpace) -> bool {
|
||||
match this {
|
||||
ast::StateSpace::Const
|
||||
| ast::StateSpace::Generic
|
||||
| ast::StateSpace::Global
|
||||
| ast::StateSpace::Local
|
||||
| ast::StateSpace::Shared => true,
|
||||
ast::StateSpace::Param | ast::StateSpace::Reg => false,
|
||||
ast::StateSpace::SharedCluster
|
||||
| ast::StateSpace::SharedCta
|
||||
| ast::StateSpace::ParamEntry
|
||||
| ast::StateSpace::ParamFunc => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
// Space is different
|
||||
fn default_implicit_conversion_space(
|
||||
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
|
||||
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
|
||||
) -> Result<Option<ConversionKind>, TranslateError> {
|
||||
if (instruction_space == ast::StateSpace::Generic && coerces_to_generic(operand_space))
|
||||
|| (operand_space == ast::StateSpace::Generic && coerces_to_generic(instruction_space))
|
||||
{
|
||||
Ok(Some(ConversionKind::PtrToPtr))
|
||||
} else if operand_space == ast::StateSpace::Reg {
|
||||
match operand_type {
|
||||
ast::Type::Pointer(operand_ptr_type, operand_ptr_space)
|
||||
if *operand_ptr_space == instruction_space =>
|
||||
{
|
||||
if instruction_type != &ast::Type::Scalar(*operand_ptr_type) {
|
||||
Ok(Some(ConversionKind::PtrToPtr))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
// TODO: 32 bit
|
||||
ast::Type::Scalar(ast::ScalarType::B64)
|
||||
| ast::Type::Scalar(ast::ScalarType::U64)
|
||||
| ast::Type::Scalar(ast::ScalarType::S64) => match instruction_space {
|
||||
ast::StateSpace::Global
|
||||
| ast::StateSpace::Generic
|
||||
| ast::StateSpace::Const
|
||||
| ast::StateSpace::Local
|
||||
| ast::StateSpace::Shared => Ok(Some(ConversionKind::BitToPtr)),
|
||||
_ => Err(error_mismatched_type()),
|
||||
},
|
||||
ast::Type::Scalar(ast::ScalarType::B32)
|
||||
| ast::Type::Scalar(ast::ScalarType::U32)
|
||||
| ast::Type::Scalar(ast::ScalarType::S32) => match instruction_space {
|
||||
ast::StateSpace::Const | ast::StateSpace::Local | ast::StateSpace::Shared => {
|
||||
Ok(Some(ConversionKind::BitToPtr))
|
||||
}
|
||||
_ => Err(error_mismatched_type()),
|
||||
},
|
||||
_ => Err(error_mismatched_type()),
|
||||
}
|
||||
} else if instruction_space == ast::StateSpace::Reg {
|
||||
match instruction_type {
|
||||
ast::Type::Pointer(instruction_ptr_type, instruction_ptr_space)
|
||||
if operand_space == *instruction_ptr_space =>
|
||||
{
|
||||
if operand_type != &ast::Type::Scalar(*instruction_ptr_type) {
|
||||
Ok(Some(ConversionKind::PtrToPtr))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
_ => Err(error_mismatched_type()),
|
||||
}
|
||||
} else {
|
||||
Err(error_mismatched_type())
|
||||
}
|
||||
}
|
||||
|
||||
// Space is same, but type is different
|
||||
fn default_implicit_conversion_type(
|
||||
space: ast::StateSpace,
|
||||
operand_type: &ast::Type,
|
||||
instruction_type: &ast::Type,
|
||||
) -> Result<Option<ConversionKind>, TranslateError> {
|
||||
if space == ast::StateSpace::Reg {
|
||||
if should_bitcast(instruction_type, operand_type) {
|
||||
Ok(Some(ConversionKind::Default))
|
||||
} else {
|
||||
Err(TranslateError::MismatchedType)
|
||||
}
|
||||
} else {
|
||||
Ok(Some(ConversionKind::PtrToPtr))
|
||||
}
|
||||
}
|
||||
|
||||
fn coerces_to_generic(this: ast::StateSpace) -> bool {
|
||||
match this {
|
||||
ast::StateSpace::Global
|
||||
| ast::StateSpace::Const
|
||||
| ast::StateSpace::Local
|
||||
| ptx_parser::StateSpace::SharedCta
|
||||
| ast::StateSpace::SharedCluster
|
||||
| ast::StateSpace::Shared => true,
|
||||
ast::StateSpace::Reg
|
||||
| ast::StateSpace::Param
|
||||
| ast::StateSpace::ParamEntry
|
||||
| ast::StateSpace::ParamFunc
|
||||
| ast::StateSpace::Generic => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn should_bitcast(instr: &ast::Type, operand: &ast::Type) -> bool {
|
||||
match (instr, operand) {
|
||||
(ast::Type::Scalar(inst), ast::Type::Scalar(operand)) => {
|
||||
if inst.size_of() != operand.size_of() {
|
||||
return false;
|
||||
}
|
||||
match inst.kind() {
|
||||
ast::ScalarKind::Bit => operand.kind() != ast::ScalarKind::Bit,
|
||||
ast::ScalarKind::Float => operand.kind() == ast::ScalarKind::Bit,
|
||||
ast::ScalarKind::Signed => {
|
||||
operand.kind() == ast::ScalarKind::Bit
|
||||
|| operand.kind() == ast::ScalarKind::Unsigned
|
||||
}
|
||||
ast::ScalarKind::Unsigned => {
|
||||
operand.kind() == ast::ScalarKind::Bit
|
||||
|| operand.kind() == ast::ScalarKind::Signed
|
||||
}
|
||||
ast::ScalarKind::Pred => false,
|
||||
}
|
||||
}
|
||||
(ast::Type::Vector(_, inst), ast::Type::Vector(_, operand))
|
||||
| (ast::Type::Array(_, inst, _), ast::Type::Array(_, operand, _)) => {
|
||||
should_bitcast(&ast::Type::Scalar(*inst), &ast::Type::Scalar(*operand))
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn should_convert_relaxed_dst_wrapper(
|
||||
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
|
||||
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
|
||||
) -> Result<Option<ConversionKind>, TranslateError> {
|
||||
if operand_space != instruction_space {
|
||||
return Err(TranslateError::MismatchedType);
|
||||
}
|
||||
if operand_type == instruction_type {
|
||||
return Ok(None);
|
||||
}
|
||||
match should_convert_relaxed_dst(operand_type, instruction_type) {
|
||||
conv @ Some(_) => Ok(conv),
|
||||
None => Err(TranslateError::MismatchedType),
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#operand-size-exceeding-instruction-type-size__relaxed-type-checking-rules-destination-operands
|
||||
fn should_convert_relaxed_dst(
|
||||
dst_type: &ast::Type,
|
||||
instr_type: &ast::Type,
|
||||
) -> Option<ConversionKind> {
|
||||
if dst_type == instr_type {
|
||||
return None;
|
||||
}
|
||||
match (dst_type, instr_type) {
|
||||
(ast::Type::Scalar(dst_type), ast::Type::Scalar(instr_type)) => match instr_type.kind() {
|
||||
ast::ScalarKind::Bit => {
|
||||
if instr_type.size_of() <= dst_type.size_of() {
|
||||
Some(ConversionKind::Default)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ast::ScalarKind::Signed => {
|
||||
if dst_type.kind() != ast::ScalarKind::Float {
|
||||
if instr_type.size_of() == dst_type.size_of() {
|
||||
Some(ConversionKind::Default)
|
||||
} else if instr_type.size_of() < dst_type.size_of() {
|
||||
Some(ConversionKind::SignExtend)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ast::ScalarKind::Unsigned => {
|
||||
if instr_type.size_of() <= dst_type.size_of()
|
||||
&& dst_type.kind() != ast::ScalarKind::Float
|
||||
{
|
||||
Some(ConversionKind::Default)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ast::ScalarKind::Float => {
|
||||
if instr_type.size_of() <= dst_type.size_of()
|
||||
&& dst_type.kind() == ast::ScalarKind::Bit
|
||||
{
|
||||
Some(ConversionKind::Default)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ast::ScalarKind::Pred => None,
|
||||
},
|
||||
(ast::Type::Vector(_, dst_type), ast::Type::Vector(_, instr_type))
|
||||
| (ast::Type::Array(_, dst_type, _), ast::Type::Array(_, instr_type, _)) => {
|
||||
should_convert_relaxed_dst(
|
||||
&ast::Type::Scalar(*dst_type),
|
||||
&ast::Type::Scalar(*instr_type),
|
||||
)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn should_convert_relaxed_src_wrapper(
|
||||
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
|
||||
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
|
||||
) -> Result<Option<ConversionKind>, TranslateError> {
|
||||
if operand_space != instruction_space {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
if operand_type == instruction_type {
|
||||
return Ok(None);
|
||||
}
|
||||
match should_convert_relaxed_src(operand_type, instruction_type) {
|
||||
conv @ Some(_) => Ok(conv),
|
||||
None => Err(error_mismatched_type()),
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#operand-size-exceeding-instruction-type-size__relaxed-type-checking-rules-source-operands
|
||||
fn should_convert_relaxed_src(
|
||||
src_type: &ast::Type,
|
||||
instr_type: &ast::Type,
|
||||
) -> Option<ConversionKind> {
|
||||
if src_type == instr_type {
|
||||
return None;
|
||||
}
|
||||
match (src_type, instr_type) {
|
||||
(ast::Type::Scalar(src_type), ast::Type::Scalar(instr_type)) => match instr_type.kind() {
|
||||
ast::ScalarKind::Bit => {
|
||||
if instr_type.size_of() <= src_type.size_of() {
|
||||
Some(ConversionKind::Default)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ast::ScalarKind::Signed | ast::ScalarKind::Unsigned => {
|
||||
if instr_type.size_of() <= src_type.size_of()
|
||||
&& src_type.kind() != ast::ScalarKind::Float
|
||||
{
|
||||
Some(ConversionKind::Default)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ast::ScalarKind::Float => {
|
||||
if instr_type.size_of() <= src_type.size_of()
|
||||
&& src_type.kind() == ast::ScalarKind::Bit
|
||||
{
|
||||
Some(ConversionKind::Default)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
ast::ScalarKind::Pred => None,
|
||||
},
|
||||
(ast::Type::Vector(_, dst_type), ast::Type::Vector(_, instr_type))
|
||||
| (ast::Type::Array(_, dst_type, _), ast::Type::Array(_, instr_type, _)) => {
|
||||
should_convert_relaxed_src(
|
||||
&ast::Type::Scalar(*dst_type),
|
||||
&ast::Type::Scalar(*instr_type),
|
||||
)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
.version 6.5
|
||||
.target sm_50
|
||||
.address_size 64
|
||||
|
||||
.func use_modes();
|
||||
|
||||
.visible .entry kernel()
|
||||
{
|
||||
.reg .f32 temp;
|
||||
|
||||
add.rz.ftz.f32 temp, temp, temp;
|
||||
call use_modes;
|
||||
add.rp.ftz.f32 temp, temp, temp;
|
||||
ret;
|
||||
}
|
||||
|
||||
.func use_modes()
|
||||
{
|
||||
.reg .f32 temp;
|
||||
.reg .pred pred;
|
||||
@pred bra SET_RM;
|
||||
@!pred bra SET_RZ;
|
||||
SET_RM:
|
||||
add.rm.f32 temp, temp, temp;
|
||||
ret;
|
||||
SET_RZ:
|
||||
add.rz.f32 temp, temp, temp;
|
||||
ret;
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
.version 6.5
|
||||
.target sm_30
|
||||
.address_size 64
|
||||
|
||||
.visible .entry add()
|
||||
{
|
||||
.reg .f32 temp<3>;
|
||||
|
||||
add.ftz.f16 temp2, temp1, temp0;
|
||||
add.ftz.f32 temp2, temp1, temp0;
|
||||
|
||||
add.f16 temp2, temp1, temp0;
|
||||
add.f32 temp2, temp1, temp0;
|
||||
ret;
|
||||
}
|
1907
ptx/src/pass/instruction_mode_to_global_mode/mod.rs
Normal file
1907
ptx/src/pass/instruction_mode_to_global_mode/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
399
ptx/src/pass/instruction_mode_to_global_mode/test.rs
Normal file
399
ptx/src/pass/instruction_mode_to_global_mode/test.rs
Normal file
@ -0,0 +1,399 @@
|
||||
use super::*;
|
||||
use int_enum::IntEnum;
|
||||
use strum::EnumCount;
|
||||
|
||||
#[repr(usize)]
|
||||
#[derive(IntEnum, Eq, PartialEq, Copy, Clone, Debug)]
|
||||
enum Bool {
|
||||
False = 0,
|
||||
True = 1,
|
||||
}
|
||||
|
||||
fn ftz() -> InstructionModes {
|
||||
InstructionModes {
|
||||
denormal_f32: Some(DenormalMode::FlushToZero),
|
||||
denormal_f16f64: None,
|
||||
rounding_f32: None,
|
||||
rounding_f16f64: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn preserve() -> InstructionModes {
|
||||
InstructionModes {
|
||||
denormal_f32: Some(DenormalMode::Preserve),
|
||||
denormal_f16f64: None,
|
||||
rounding_f32: None,
|
||||
rounding_f16f64: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transitive_mixed() {
|
||||
let mut graph = ControlFlowGraph::new();
|
||||
let entry_id = SpirvWord(1);
|
||||
let false_id = SpirvWord(2);
|
||||
let empty_id = SpirvWord(3);
|
||||
let false2_id = SpirvWord(4);
|
||||
let entry = graph.add_entry_basic_block(entry_id);
|
||||
graph.add_jump(entry, false_id);
|
||||
let false_ = graph.get_or_add_basic_block(false_id);
|
||||
graph.set_modes(false_, ftz(), ftz());
|
||||
graph.add_jump(false_, empty_id);
|
||||
let empty = graph.get_or_add_basic_block(empty_id);
|
||||
graph.add_jump(empty, false2_id);
|
||||
let false2_ = graph.get_or_add_basic_block(false2_id);
|
||||
graph.set_modes(false2_, ftz(), ftz());
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 0);
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 1);
|
||||
assert_eq!(
|
||||
partial_result.bb_maybe_insert_mode[&false_id],
|
||||
(DenormalMode::FlushToZero, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks.len(), 0);
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::FlushToZero);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transitive_change_twice() {
|
||||
let mut graph = ControlFlowGraph::new();
|
||||
let entry_id = SpirvWord(1);
|
||||
let false_id = SpirvWord(2);
|
||||
let empty_id = SpirvWord(3);
|
||||
let true_id = SpirvWord(4);
|
||||
let entry = graph.add_entry_basic_block(entry_id);
|
||||
graph.add_jump(entry, false_id);
|
||||
let false_ = graph.get_or_add_basic_block(false_id);
|
||||
graph.set_modes(false_, ftz(), ftz());
|
||||
graph.add_jump(false_, empty_id);
|
||||
let empty = graph.get_or_add_basic_block(empty_id);
|
||||
graph.add_jump(empty, true_id);
|
||||
let true_ = graph.get_or_add_basic_block(true_id);
|
||||
graph.set_modes(true_, preserve(), preserve());
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 1);
|
||||
assert!(partial_result.bb_must_insert_mode.contains(&true_id));
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 1);
|
||||
assert_eq!(
|
||||
partial_result.bb_maybe_insert_mode[&false_id],
|
||||
(DenormalMode::FlushToZero, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks, iter::once(true_id).collect());
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::FlushToZero);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transitive_change() {
|
||||
let mut graph = ControlFlowGraph::new();
|
||||
let entry_id = SpirvWord(1);
|
||||
let empty_id = SpirvWord(2);
|
||||
let true_id = SpirvWord(3);
|
||||
let entry = graph.add_entry_basic_block(entry_id);
|
||||
graph.add_jump(entry, empty_id);
|
||||
let empty = graph.get_or_add_basic_block(empty_id);
|
||||
graph.add_jump(empty, true_id);
|
||||
let true_ = graph.get_or_add_basic_block(true_id);
|
||||
graph.set_modes(true_, preserve(), preserve());
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 0);
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 1);
|
||||
assert_eq!(
|
||||
partial_result.bb_maybe_insert_mode[&true_id],
|
||||
(DenormalMode::Preserve, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks.len(), 0);
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::Preserve);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn codependency() {
|
||||
let mut graph = ControlFlowGraph::new();
|
||||
let entry_id = SpirvWord(1);
|
||||
let left_f_id = SpirvWord(2);
|
||||
let right_f_id = SpirvWord(3);
|
||||
let left_none_id = SpirvWord(4);
|
||||
let mid_none_id = SpirvWord(5);
|
||||
let right_none_id = SpirvWord(6);
|
||||
let entry = graph.add_entry_basic_block(entry_id);
|
||||
graph.add_jump(entry, left_f_id);
|
||||
graph.add_jump(entry, right_f_id);
|
||||
let left_f = graph.get_or_add_basic_block(left_f_id);
|
||||
graph.set_modes(left_f, ftz(), ftz());
|
||||
let right_f = graph.get_or_add_basic_block(right_f_id);
|
||||
graph.set_modes(right_f, ftz(), ftz());
|
||||
graph.add_jump(left_f, left_none_id);
|
||||
let left_none = graph.get_or_add_basic_block(left_none_id);
|
||||
graph.add_jump(right_f, right_none_id);
|
||||
let right_none = graph.get_or_add_basic_block(right_none_id);
|
||||
graph.add_jump(left_none, mid_none_id);
|
||||
graph.add_jump(right_none, mid_none_id);
|
||||
let mid_none = graph.get_or_add_basic_block(mid_none_id);
|
||||
graph.add_jump(mid_none, left_none_id);
|
||||
graph.add_jump(mid_none, right_none_id);
|
||||
//println!(
|
||||
// "{:?}",
|
||||
// petgraph::dot::Dot::with_config(&graph.graph, &[petgraph::dot::Config::EdgeNoLabel])
|
||||
//);
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 0);
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 2);
|
||||
assert_eq!(
|
||||
partial_result.bb_maybe_insert_mode[&left_f_id],
|
||||
(DenormalMode::FlushToZero, iter::once(entry_id).collect())
|
||||
);
|
||||
assert_eq!(
|
||||
partial_result.bb_maybe_insert_mode[&right_f_id],
|
||||
(DenormalMode::FlushToZero, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks.len(), 0);
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::FlushToZero);
|
||||
}
|
||||
|
||||
static FOLD_DENORMAL_PTX: &'static str = include_str!("fold_denormal.ptx");
|
||||
|
||||
#[test]
|
||||
fn fold_denormal() {
|
||||
let method = compile_methods(FOLD_DENORMAL_PTX).pop().unwrap();
|
||||
assert_eq!(true, method.flush_to_zero_f32);
|
||||
assert_eq!(true, method.flush_to_zero_f16f64);
|
||||
let method_body = method.body.unwrap();
|
||||
assert!(matches!(
|
||||
&*method_body,
|
||||
[
|
||||
Statement::Label(..),
|
||||
Statement::Variable(..),
|
||||
Statement::Variable(..),
|
||||
Statement::Variable(..),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::SetMode(ModeRegister::Denormal {
|
||||
f32: false,
|
||||
f16f64: false
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::Instruction(ast::Instruction::Ret { .. }),
|
||||
]
|
||||
));
|
||||
}
|
||||
|
||||
fn compile_methods(ptx: &str) -> Vec<Function2<ast::Instruction<SpirvWord>, SpirvWord>> {
|
||||
use crate::pass::*;
|
||||
|
||||
let module = ptx_parser::parse_module_checked(ptx).unwrap();
|
||||
let mut flat_resolver = GlobalStringIdentResolver2::new(SpirvWord(1));
|
||||
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
||||
let directives = normalize_identifiers2::run(&mut scoped_resolver, module.directives).unwrap();
|
||||
let directives = normalize_predicates2::run(&mut flat_resolver, directives).unwrap();
|
||||
let directives = expand_operands::run(&mut flat_resolver, directives).unwrap();
|
||||
let directives = normalize_basic_blocks::run(&mut flat_resolver, directives).unwrap();
|
||||
let directives = super::run(&mut flat_resolver, directives).unwrap();
|
||||
directives
|
||||
.into_iter()
|
||||
.filter_map(|s| match s {
|
||||
Directive2::Method(m) => Some(m),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
static CALL_WITH_MODE_PTX: &'static str = include_str!("call_with_mode.ptx");
|
||||
|
||||
#[test]
|
||||
fn call_with_mode() {
|
||||
let methods = compile_methods(CALL_WITH_MODE_PTX);
|
||||
|
||||
assert!(matches!(methods[0].body, None));
|
||||
|
||||
let method_1 = methods[1].body.as_ref().unwrap();
|
||||
assert!(matches!(
|
||||
&**method_1,
|
||||
[
|
||||
Statement::Label(..),
|
||||
Statement::Variable(..),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::Instruction(ast::Instruction::Call { .. }),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
Statement::Label(..),
|
||||
// Dual prelude
|
||||
Statement::SetMode(ModeRegister::Denormal {
|
||||
f32: true,
|
||||
f16f64: true
|
||||
}),
|
||||
Statement::SetMode(ModeRegister::Rounding {
|
||||
f32: ast::RoundingMode::PositiveInf,
|
||||
f16f64: ast::RoundingMode::NearestEven
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
// Denormal prelude
|
||||
Statement::Label(..),
|
||||
Statement::SetMode(ModeRegister::Denormal {
|
||||
f32: true,
|
||||
f16f64: true
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
// Rounding prelude
|
||||
Statement::Label(..),
|
||||
Statement::SetMode(ModeRegister::Rounding {
|
||||
f32: ast::RoundingMode::PositiveInf,
|
||||
f16f64: ast::RoundingMode::NearestEven
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
Statement::Label(..),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::Instruction(ast::Instruction::Ret { .. }),
|
||||
]
|
||||
));
|
||||
let [to_fn0] = calls(method_1);
|
||||
let [_, dual_prelude, _, _, add] = labels(method_1);
|
||||
let [post_call, post_prelude_dual, post_prelude_denormal, post_prelude_rounding] =
|
||||
branches(method_1);
|
||||
assert_eq!(methods[0].name, to_fn0);
|
||||
assert_eq!(post_call, dual_prelude);
|
||||
assert_eq!(post_prelude_dual, add);
|
||||
assert_eq!(post_prelude_denormal, add);
|
||||
assert_eq!(post_prelude_rounding, add);
|
||||
|
||||
let method_2 = methods[2].body.as_ref().unwrap();
|
||||
assert!(matches!(
|
||||
&**method_2,
|
||||
[
|
||||
Statement::Label(..),
|
||||
Statement::Variable(..),
|
||||
Statement::Variable(..),
|
||||
Statement::Conditional(..),
|
||||
Statement::Label(..),
|
||||
Statement::Conditional(..),
|
||||
Statement::Label(..),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
Statement::Label(..),
|
||||
// Dual prelude
|
||||
Statement::SetMode(ModeRegister::Denormal {
|
||||
f32: false,
|
||||
f16f64: true
|
||||
}),
|
||||
Statement::SetMode(ModeRegister::Rounding {
|
||||
f32: ast::RoundingMode::NegativeInf,
|
||||
f16f64: ast::RoundingMode::NearestEven
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
// Denormal prelude
|
||||
Statement::Label(..),
|
||||
Statement::SetMode(ModeRegister::Denormal {
|
||||
f32: false,
|
||||
f16f64: true
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
// Rounding prelude
|
||||
Statement::Label(..),
|
||||
Statement::SetMode(ModeRegister::Rounding {
|
||||
f32: ast::RoundingMode::NegativeInf,
|
||||
f16f64: ast::RoundingMode::NearestEven
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
Statement::Label(..),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
Statement::Label(..),
|
||||
Statement::SetMode(ModeRegister::Denormal {
|
||||
f32: false,
|
||||
f16f64: true
|
||||
}),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
Statement::Label(..),
|
||||
Statement::Instruction(ast::Instruction::Add { .. }),
|
||||
Statement::Instruction(ast::Instruction::Bra { .. }),
|
||||
Statement::Label(..),
|
||||
Statement::Instruction(ast::Instruction::Ret { .. }),
|
||||
]
|
||||
));
|
||||
let [(if_rm_true, if_rm_false), (if_rz_true, if_rz_false)] = conditionals(method_2);
|
||||
let [_, conditional2, post_conditional2, prelude_dual, _, _, add1, add2_set_denormal, add2, ret] =
|
||||
labels(method_2);
|
||||
let [post_conditional2_jump, post_prelude_dual, post_prelude_denormal, post_prelude_rounding, post_add1, post_add2_set_denormal, post_add2] =
|
||||
branches(method_2);
|
||||
assert_eq!(if_rm_true, prelude_dual);
|
||||
assert_eq!(if_rm_false, conditional2);
|
||||
assert_eq!(if_rz_true, post_conditional2);
|
||||
assert_eq!(if_rz_false, add2_set_denormal);
|
||||
assert_eq!(post_conditional2_jump, prelude_dual);
|
||||
assert_eq!(post_prelude_dual, add1);
|
||||
assert_eq!(post_prelude_denormal, add1);
|
||||
assert_eq!(post_prelude_rounding, add1);
|
||||
assert_eq!(post_add1, ret);
|
||||
assert_eq!(post_add2_set_denormal, add2);
|
||||
assert_eq!(post_add2, ret);
|
||||
}
|
||||
|
||||
fn branches<const N: usize>(
|
||||
fn_: &Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> [SpirvWord; N] {
|
||||
fn_.iter()
|
||||
.filter_map(|s| match s {
|
||||
Statement::Instruction(ast::Instruction::Bra {
|
||||
arguments: ast::BraArgs { src },
|
||||
}) => Some(*src),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn labels<const N: usize>(
|
||||
fn_: &Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> [SpirvWord; N] {
|
||||
fn_.iter()
|
||||
.filter_map(
|
||||
|s: &Statement<ptx_parser::Instruction<SpirvWord>, SpirvWord>| match s {
|
||||
Statement::Label(label) => Some(*label),
|
||||
_ => None,
|
||||
},
|
||||
)
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn calls<const N: usize>(
|
||||
fn_: &Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> [SpirvWord; N] {
|
||||
fn_.iter()
|
||||
.filter_map(|s| match s {
|
||||
Statement::Instruction(ast::Instruction::Call {
|
||||
arguments: ast::CallArgs { func, .. },
|
||||
..
|
||||
}) => Some(*func),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn conditionals<const N: usize>(
|
||||
fn_: &Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> [(SpirvWord, SpirvWord); N] {
|
||||
fn_.iter()
|
||||
.filter_map(|s| match s {
|
||||
Statement::Conditional(BrachCondition {
|
||||
if_true, if_false, ..
|
||||
}) => Some((*if_true, *if_false)),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
}
|
914
ptx/src/pass/mod.rs
Normal file
914
ptx/src/pass/mod.rs
Normal file
@ -0,0 +1,914 @@
|
||||
use ptx_parser as ast;
|
||||
use quick_error::quick_error;
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::hash::Hash;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{hash_map, HashMap},
|
||||
ffi::CString,
|
||||
iter,
|
||||
};
|
||||
use strum::IntoEnumIterator;
|
||||
use strum_macros::EnumIter;
|
||||
|
||||
mod deparamize_functions;
|
||||
pub(crate) mod emit_llvm;
|
||||
mod expand_operands;
|
||||
mod fix_special_registers2;
|
||||
mod hoist_globals;
|
||||
mod insert_explicit_load_store;
|
||||
mod instruction_mode_to_global_mode;
|
||||
mod insert_implicit_conversions2;
|
||||
mod normalize_basic_blocks;
|
||||
mod normalize_identifiers2;
|
||||
mod normalize_predicates2;
|
||||
mod remove_unreachable_basic_blocks;
|
||||
mod replace_instructions_with_function_calls;
|
||||
mod replace_known_functions;
|
||||
mod resolve_function_pointers;
|
||||
|
||||
static ZLUDA_PTX_IMPL: &'static [u8] = include_bytes!("../../lib/zluda_ptx_impl.bc");
|
||||
const ZLUDA_PTX_PREFIX: &'static str = "__zluda_ptx_impl_";
|
||||
|
||||
quick_error! {
|
||||
#[derive(Debug)]
|
||||
pub enum TranslateError {
|
||||
UnknownSymbol {}
|
||||
UntypedSymbol {}
|
||||
MismatchedType {}
|
||||
Unreachable {}
|
||||
Todo {}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_llvm_module<'input>(ast: ast::Module<'input>) -> Result<Module, TranslateError> {
|
||||
let mut flat_resolver = GlobalStringIdentResolver2::<'input>::new(SpirvWord(1));
|
||||
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
|
||||
let sreg_map = SpecialRegistersMap2::new(&mut scoped_resolver)?;
|
||||
let directives = normalize_identifiers2::run(&mut scoped_resolver, ast.directives)?;
|
||||
let directives = replace_known_functions::run(&mut flat_resolver, directives);
|
||||
let directives = normalize_predicates2::run(&mut flat_resolver, directives)?;
|
||||
let directives = resolve_function_pointers::run(directives)?;
|
||||
let directives = fix_special_registers2::run(&mut flat_resolver, &sreg_map, directives)?;
|
||||
let directives = expand_operands::run(&mut flat_resolver, directives)?;
|
||||
let directives = deparamize_functions::run(&mut flat_resolver, directives)?;
|
||||
let directives = normalize_basic_blocks::run(&mut flat_resolver, directives)?;
|
||||
let directives = remove_unreachable_basic_blocks::run(directives)?;
|
||||
let directives = instruction_mode_to_global_mode::run(&mut flat_resolver, directives)?;
|
||||
let directives = insert_explicit_load_store::run(&mut flat_resolver, directives)?;
|
||||
let directives = insert_implicit_conversions2::run(&mut flat_resolver, directives)?;
|
||||
let directives = replace_instructions_with_function_calls::run(&mut flat_resolver, directives)?;
|
||||
let directives = hoist_globals::run(directives)?;
|
||||
let llvm_ir = emit_llvm::run(flat_resolver, directives)?;
|
||||
Ok(Module {
|
||||
llvm_ir,
|
||||
kernel_info: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
pub struct Module {
|
||||
pub llvm_ir: emit_llvm::Module,
|
||||
pub kernel_info: HashMap<String, KernelInfo>,
|
||||
}
|
||||
|
||||
impl Module {
|
||||
pub fn linked_bitcode(&self) -> &[u8] {
|
||||
ZLUDA_PTX_IMPL
|
||||
}
|
||||
}
|
||||
|
||||
pub struct KernelInfo {
|
||||
pub arguments_sizes: Vec<(usize, bool)>,
|
||||
pub uses_shared_mem: bool,
|
||||
}
|
||||
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq, Hash, Copy, Clone, EnumIter)]
|
||||
enum PtxSpecialRegister {
|
||||
Tid,
|
||||
Ntid,
|
||||
Ctaid,
|
||||
Nctaid,
|
||||
Clock,
|
||||
LanemaskLt,
|
||||
}
|
||||
|
||||
impl PtxSpecialRegister {
|
||||
fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Tid => "%tid",
|
||||
Self::Ntid => "%ntid",
|
||||
Self::Ctaid => "%ctaid",
|
||||
Self::Nctaid => "%nctaid",
|
||||
Self::Clock => "%clock",
|
||||
Self::LanemaskLt => "%lanemask_lt",
|
||||
}
|
||||
}
|
||||
|
||||
fn get_type(self) -> ast::Type {
|
||||
match self {
|
||||
PtxSpecialRegister::Tid
|
||||
| PtxSpecialRegister::Ntid
|
||||
| PtxSpecialRegister::Ctaid
|
||||
| PtxSpecialRegister::Nctaid => ast::Type::Vector(4, self.get_function_return_type()),
|
||||
_ => ast::Type::Scalar(self.get_function_return_type()),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_function_return_type(self) -> ast::ScalarType {
|
||||
match self {
|
||||
PtxSpecialRegister::Tid => ast::ScalarType::U32,
|
||||
PtxSpecialRegister::Ntid => ast::ScalarType::U32,
|
||||
PtxSpecialRegister::Ctaid => ast::ScalarType::U32,
|
||||
PtxSpecialRegister::Nctaid => ast::ScalarType::U32,
|
||||
PtxSpecialRegister::Clock => ast::ScalarType::U32,
|
||||
PtxSpecialRegister::LanemaskLt => ast::ScalarType::U32,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_function_input_type(self) -> Option<ast::ScalarType> {
|
||||
match self {
|
||||
PtxSpecialRegister::Tid
|
||||
| PtxSpecialRegister::Ntid
|
||||
| PtxSpecialRegister::Ctaid
|
||||
| PtxSpecialRegister::Nctaid => Some(ast::ScalarType::U8),
|
||||
PtxSpecialRegister::Clock | PtxSpecialRegister::LanemaskLt => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_unprefixed_function_name(self) -> &'static str {
|
||||
match self {
|
||||
PtxSpecialRegister::Tid => "sreg_tid",
|
||||
PtxSpecialRegister::Ntid => "sreg_ntid",
|
||||
PtxSpecialRegister::Ctaid => "sreg_ctaid",
|
||||
PtxSpecialRegister::Nctaid => "sreg_nctaid",
|
||||
PtxSpecialRegister::Clock => "sreg_clock",
|
||||
PtxSpecialRegister::LanemaskLt => "sreg_lanemask_lt",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
fn error_unreachable() -> TranslateError {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
fn error_unreachable() -> TranslateError {
|
||||
TranslateError::Unreachable
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
fn error_todo() -> TranslateError {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
fn error_todo() -> TranslateError {
|
||||
TranslateError::Todo
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
fn error_unknown_symbol() -> TranslateError {
|
||||
panic!()
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
fn error_unknown_symbol() -> TranslateError {
|
||||
TranslateError::UnknownSymbol
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
fn error_mismatched_type() -> TranslateError {
|
||||
panic!()
|
||||
}
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
fn error_mismatched_type() -> TranslateError {
|
||||
TranslateError::MismatchedType
|
||||
}
|
||||
|
||||
enum Statement<I, P: ast::Operand> {
|
||||
Label(SpirvWord),
|
||||
Variable(ast::Variable<P::Ident>),
|
||||
Instruction(I),
|
||||
// SPIR-V compatible replacement for PTX predicates
|
||||
Conditional(BrachCondition),
|
||||
Conversion(ImplicitConversion),
|
||||
Constant(ConstantDefinition),
|
||||
RetValue(ast::RetData, Vec<(SpirvWord, ast::Type)>),
|
||||
PtrAccess(PtrAccess<P>),
|
||||
RepackVector(RepackVectorDetails),
|
||||
FunctionPointer(FunctionPointerDetails),
|
||||
VectorRead(VectorRead),
|
||||
VectorWrite(VectorWrite),
|
||||
SetMode(ModeRegister),
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Clone, Copy)]
|
||||
#[cfg_attr(test, derive(Debug))]
|
||||
enum ModeRegister {
|
||||
Denormal {
|
||||
f32: bool,
|
||||
f16f64: bool,
|
||||
},
|
||||
Rounding {
|
||||
f32: ast::RoundingMode,
|
||||
f16f64: ast::RoundingMode,
|
||||
},
|
||||
}
|
||||
|
||||
impl<T: ast::Operand<Ident = SpirvWord>> Statement<ast::Instruction<T>, T> {
|
||||
fn visit_map<To: ast::Operand<Ident = SpirvWord>, Err>(
|
||||
self,
|
||||
visitor: &mut impl ast::VisitorMap<T, To, Err>,
|
||||
) -> std::result::Result<Statement<ast::Instruction<To>, To>, Err> {
|
||||
Ok(match self {
|
||||
Statement::Instruction(i) => {
|
||||
return ast::visit_map(i, visitor).map(Statement::Instruction)
|
||||
}
|
||||
Statement::Label(label) => {
|
||||
Statement::Label(visitor.visit_ident(label, None, false, false)?)
|
||||
}
|
||||
Statement::Variable(var) => {
|
||||
let name = visitor.visit_ident(
|
||||
var.name,
|
||||
Some((&var.v_type, var.state_space)),
|
||||
true,
|
||||
false,
|
||||
)?;
|
||||
Statement::Variable(ast::Variable {
|
||||
align: var.align,
|
||||
v_type: var.v_type,
|
||||
state_space: var.state_space,
|
||||
name,
|
||||
array_init: var.array_init,
|
||||
})
|
||||
}
|
||||
Statement::Conditional(conditional) => {
|
||||
let predicate = visitor.visit_ident(
|
||||
conditional.predicate,
|
||||
Some((&ast::ScalarType::Pred.into(), ast::StateSpace::Reg)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
let if_true = visitor.visit_ident(conditional.if_true, None, false, false)?;
|
||||
let if_false = visitor.visit_ident(conditional.if_false, None, false, false)?;
|
||||
Statement::Conditional(BrachCondition {
|
||||
predicate,
|
||||
if_true,
|
||||
if_false,
|
||||
})
|
||||
}
|
||||
Statement::Conversion(ImplicitConversion {
|
||||
src,
|
||||
dst,
|
||||
from_type,
|
||||
to_type,
|
||||
from_space,
|
||||
to_space,
|
||||
kind,
|
||||
}) => {
|
||||
let dst = visitor.visit_ident(
|
||||
dst,
|
||||
Some((&to_type, ast::StateSpace::Reg)),
|
||||
true,
|
||||
false,
|
||||
)?;
|
||||
let src = visitor.visit_ident(
|
||||
src,
|
||||
Some((&from_type, ast::StateSpace::Reg)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
Statement::Conversion(ImplicitConversion {
|
||||
src,
|
||||
dst,
|
||||
from_type,
|
||||
to_type,
|
||||
from_space,
|
||||
to_space,
|
||||
kind,
|
||||
})
|
||||
}
|
||||
Statement::Constant(ConstantDefinition { dst, typ, value }) => {
|
||||
let dst = visitor.visit_ident(
|
||||
dst,
|
||||
Some((&typ.into(), ast::StateSpace::Reg)),
|
||||
true,
|
||||
false,
|
||||
)?;
|
||||
Statement::Constant(ConstantDefinition { dst, typ, value })
|
||||
}
|
||||
Statement::RetValue(data, value) => {
|
||||
let value = value
|
||||
.into_iter()
|
||||
.map(|(ident, type_)| {
|
||||
Ok((
|
||||
visitor.visit_ident(
|
||||
ident,
|
||||
Some((&type_, ast::StateSpace::Local)),
|
||||
false,
|
||||
false,
|
||||
)?,
|
||||
type_,
|
||||
))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Statement::RetValue(data, value)
|
||||
}
|
||||
Statement::PtrAccess(PtrAccess {
|
||||
underlying_type,
|
||||
state_space,
|
||||
dst,
|
||||
ptr_src,
|
||||
offset_src,
|
||||
}) => {
|
||||
let dst =
|
||||
visitor.visit_ident(dst, Some((&underlying_type, state_space)), true, false)?;
|
||||
let ptr_src = visitor.visit_ident(
|
||||
ptr_src,
|
||||
Some((&underlying_type, state_space)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
let offset_src = visitor.visit(
|
||||
offset_src,
|
||||
Some((
|
||||
&ast::Type::Scalar(ast::ScalarType::S64),
|
||||
ast::StateSpace::Reg,
|
||||
)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
Statement::PtrAccess(PtrAccess {
|
||||
underlying_type,
|
||||
state_space,
|
||||
dst,
|
||||
ptr_src,
|
||||
offset_src,
|
||||
})
|
||||
}
|
||||
Statement::VectorRead(VectorRead {
|
||||
scalar_type,
|
||||
vector_width,
|
||||
scalar_dst: dst,
|
||||
vector_src,
|
||||
member,
|
||||
}) => {
|
||||
let scalar_t = scalar_type.into();
|
||||
let vector_t = ast::Type::Vector(vector_width, scalar_type);
|
||||
let dst: SpirvWord = visitor.visit_ident(
|
||||
dst,
|
||||
Some((&scalar_t, ast::StateSpace::Reg)),
|
||||
true,
|
||||
false,
|
||||
)?;
|
||||
let src = visitor.visit_ident(
|
||||
vector_src,
|
||||
Some((&vector_t, ast::StateSpace::Reg)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
Statement::VectorRead(VectorRead {
|
||||
scalar_type,
|
||||
vector_width,
|
||||
scalar_dst: dst,
|
||||
vector_src: src,
|
||||
member,
|
||||
})
|
||||
}
|
||||
Statement::VectorWrite(VectorWrite {
|
||||
scalar_type,
|
||||
vector_width,
|
||||
vector_dst,
|
||||
vector_src,
|
||||
scalar_src,
|
||||
member,
|
||||
}) => {
|
||||
let scalar_t = scalar_type.into();
|
||||
let vector_t = ast::Type::Vector(vector_width, scalar_type);
|
||||
let vector_dst = visitor.visit_ident(
|
||||
vector_dst,
|
||||
Some((&vector_t, ast::StateSpace::Reg)),
|
||||
true,
|
||||
false,
|
||||
)?;
|
||||
let vector_src = visitor.visit_ident(
|
||||
vector_src,
|
||||
Some((&vector_t, ast::StateSpace::Reg)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
let scalar_src = visitor.visit_ident(
|
||||
scalar_src,
|
||||
Some((&scalar_t, ast::StateSpace::Reg)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
Statement::VectorWrite(VectorWrite {
|
||||
vector_dst,
|
||||
vector_src,
|
||||
scalar_src,
|
||||
scalar_type,
|
||||
vector_width,
|
||||
member,
|
||||
})
|
||||
}
|
||||
Statement::RepackVector(RepackVectorDetails {
|
||||
is_extract,
|
||||
typ,
|
||||
packed,
|
||||
unpacked,
|
||||
relaxed_type_check,
|
||||
}) => {
|
||||
let (packed, unpacked) = if is_extract {
|
||||
let unpacked = unpacked
|
||||
.into_iter()
|
||||
.map(|ident| {
|
||||
visitor.visit_ident(
|
||||
ident,
|
||||
Some((&typ.into(), ast::StateSpace::Reg)),
|
||||
true,
|
||||
relaxed_type_check,
|
||||
)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let packed = visitor.visit_ident(
|
||||
packed,
|
||||
Some((
|
||||
&ast::Type::Vector(unpacked.len() as u8, typ),
|
||||
ast::StateSpace::Reg,
|
||||
)),
|
||||
false,
|
||||
false,
|
||||
)?;
|
||||
(packed, unpacked)
|
||||
} else {
|
||||
let packed = visitor.visit_ident(
|
||||
packed,
|
||||
Some((
|
||||
&ast::Type::Vector(unpacked.len() as u8, typ),
|
||||
ast::StateSpace::Reg,
|
||||
)),
|
||||
true,
|
||||
false,
|
||||
)?;
|
||||
let unpacked = unpacked
|
||||
.into_iter()
|
||||
.map(|ident| {
|
||||
visitor.visit_ident(
|
||||
ident,
|
||||
Some((&typ.into(), ast::StateSpace::Reg)),
|
||||
false,
|
||||
relaxed_type_check,
|
||||
)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
(packed, unpacked)
|
||||
};
|
||||
Statement::RepackVector(RepackVectorDetails {
|
||||
is_extract,
|
||||
typ,
|
||||
packed,
|
||||
unpacked,
|
||||
relaxed_type_check,
|
||||
})
|
||||
}
|
||||
Statement::FunctionPointer(FunctionPointerDetails { dst, src }) => {
|
||||
let dst = visitor.visit_ident(
|
||||
dst,
|
||||
Some((
|
||||
&ast::Type::Scalar(ast::ScalarType::U64),
|
||||
ast::StateSpace::Reg,
|
||||
)),
|
||||
true,
|
||||
false,
|
||||
)?;
|
||||
let src = visitor.visit_ident(src, None, false, false)?;
|
||||
Statement::FunctionPointer(FunctionPointerDetails { dst, src })
|
||||
}
|
||||
Statement::SetMode(mode_register) => Statement::SetMode(mode_register),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct BrachCondition {
|
||||
predicate: SpirvWord,
|
||||
if_true: SpirvWord,
|
||||
if_false: SpirvWord,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ImplicitConversion {
|
||||
src: SpirvWord,
|
||||
dst: SpirvWord,
|
||||
from_type: ast::Type,
|
||||
to_type: ast::Type,
|
||||
from_space: ast::StateSpace,
|
||||
to_space: ast::StateSpace,
|
||||
kind: ConversionKind,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone)]
|
||||
enum ConversionKind {
|
||||
Default,
|
||||
// zero-extend/chop/bitcast depending on types
|
||||
SignExtend,
|
||||
BitToPtr,
|
||||
PtrToPtr,
|
||||
AddressOf,
|
||||
}
|
||||
|
||||
struct ConstantDefinition {
|
||||
pub dst: SpirvWord,
|
||||
pub typ: ast::ScalarType,
|
||||
pub value: ast::ImmediateValue,
|
||||
}
|
||||
|
||||
pub struct PtrAccess<T> {
|
||||
underlying_type: ast::Type,
|
||||
state_space: ast::StateSpace,
|
||||
dst: SpirvWord,
|
||||
ptr_src: SpirvWord,
|
||||
offset_src: T,
|
||||
}
|
||||
|
||||
struct RepackVectorDetails {
|
||||
is_extract: bool,
|
||||
typ: ast::ScalarType,
|
||||
packed: SpirvWord,
|
||||
unpacked: Vec<SpirvWord>,
|
||||
relaxed_type_check: bool,
|
||||
}
|
||||
|
||||
struct FunctionPointerDetails {
|
||||
dst: SpirvWord,
|
||||
src: SpirvWord,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||
pub struct SpirvWord(u32);
|
||||
|
||||
impl From<u32> for SpirvWord {
|
||||
fn from(value: u32) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
impl From<SpirvWord> for u32 {
|
||||
fn from(value: SpirvWord) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl ast::Operand for SpirvWord {
|
||||
type Ident = Self;
|
||||
|
||||
fn from_ident(ident: Self::Ident) -> Self {
|
||||
ident
|
||||
}
|
||||
}
|
||||
|
||||
type ExpandedStatement = Statement<ast::Instruction<SpirvWord>, SpirvWord>;
|
||||
|
||||
type NormalizedStatement = Statement<
|
||||
(
|
||||
Option<ast::PredAt<SpirvWord>>,
|
||||
ast::Instruction<ast::ParsedOperand<SpirvWord>>,
|
||||
),
|
||||
ast::ParsedOperand<SpirvWord>,
|
||||
>;
|
||||
|
||||
enum Directive2<Instruction, Operand: ast::Operand> {
|
||||
Variable(ast::LinkingDirective, ast::Variable<SpirvWord>),
|
||||
Method(Function2<Instruction, Operand>),
|
||||
}
|
||||
|
||||
struct Function2<Instruction, Operand: ast::Operand> {
|
||||
pub return_arguments: Vec<ast::Variable<Operand::Ident>>,
|
||||
pub name: Operand::Ident,
|
||||
pub input_arguments: Vec<ast::Variable<Operand::Ident>>,
|
||||
pub body: Option<Vec<Statement<Instruction, Operand>>>,
|
||||
is_kernel: bool,
|
||||
import_as: Option<String>,
|
||||
tuning: Vec<ast::TuningDirective>,
|
||||
linkage: ast::LinkingDirective,
|
||||
flush_to_zero_f32: bool,
|
||||
flush_to_zero_f16f64: bool,
|
||||
rounding_mode_f32: ast::RoundingMode,
|
||||
rounding_mode_f16f64: ast::RoundingMode,
|
||||
}
|
||||
|
||||
type NormalizedDirective2 = Directive2<
|
||||
(
|
||||
Option<ast::PredAt<SpirvWord>>,
|
||||
ast::Instruction<ast::ParsedOperand<SpirvWord>>,
|
||||
),
|
||||
ast::ParsedOperand<SpirvWord>,
|
||||
>;
|
||||
|
||||
type NormalizedFunction2 = Function2<
|
||||
(
|
||||
Option<ast::PredAt<SpirvWord>>,
|
||||
ast::Instruction<ast::ParsedOperand<SpirvWord>>,
|
||||
),
|
||||
ast::ParsedOperand<SpirvWord>,
|
||||
>;
|
||||
|
||||
type UnconditionalDirective =
|
||||
Directive2<ast::Instruction<ast::ParsedOperand<SpirvWord>>, ast::ParsedOperand<SpirvWord>>;
|
||||
|
||||
type UnconditionalFunction =
|
||||
Function2<ast::Instruction<ast::ParsedOperand<SpirvWord>>, ast::ParsedOperand<SpirvWord>>;
|
||||
|
||||
struct GlobalStringIdentResolver2<'input> {
|
||||
pub(crate) current_id: SpirvWord,
|
||||
pub(crate) ident_map: FxHashMap<SpirvWord, IdentEntry<'input>>,
|
||||
}
|
||||
|
||||
impl<'input> GlobalStringIdentResolver2<'input> {
|
||||
fn new(spirv_word: SpirvWord) -> Self {
|
||||
Self {
|
||||
current_id: spirv_word,
|
||||
ident_map: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn register_named(
|
||||
&mut self,
|
||||
name: Cow<'input, str>,
|
||||
type_space: Option<(ast::Type, ast::StateSpace)>,
|
||||
) -> SpirvWord {
|
||||
let new_id = self.current_id;
|
||||
self.ident_map.insert(
|
||||
new_id,
|
||||
IdentEntry {
|
||||
name: Some(name),
|
||||
type_space,
|
||||
},
|
||||
);
|
||||
self.current_id.0 += 1;
|
||||
new_id
|
||||
}
|
||||
|
||||
fn register_unnamed(&mut self, type_space: Option<(ast::Type, ast::StateSpace)>) -> SpirvWord {
|
||||
let new_id = self.current_id;
|
||||
self.ident_map.insert(
|
||||
new_id,
|
||||
IdentEntry {
|
||||
name: None,
|
||||
type_space,
|
||||
},
|
||||
);
|
||||
self.current_id.0 += 1;
|
||||
new_id
|
||||
}
|
||||
|
||||
fn get_typed(&self, id: SpirvWord) -> Result<&(ast::Type, ast::StateSpace), TranslateError> {
|
||||
match self.ident_map.get(&id) {
|
||||
Some(IdentEntry {
|
||||
type_space: Some(type_space),
|
||||
..
|
||||
}) => Ok(type_space),
|
||||
_ => Err(error_unknown_symbol()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct IdentEntry<'input> {
|
||||
name: Option<Cow<'input, str>>,
|
||||
type_space: Option<(ast::Type, ast::StateSpace)>,
|
||||
}
|
||||
|
||||
struct ScopedResolver<'input, 'b> {
|
||||
flat_resolver: &'b mut GlobalStringIdentResolver2<'input>,
|
||||
scopes: Vec<ScopeMarker<'input>>,
|
||||
}
|
||||
|
||||
impl<'input, 'b> ScopedResolver<'input, 'b> {
|
||||
fn new(flat_resolver: &'b mut GlobalStringIdentResolver2<'input>) -> Self {
|
||||
Self {
|
||||
flat_resolver,
|
||||
scopes: vec![ScopeMarker::new()],
|
||||
}
|
||||
}
|
||||
|
||||
fn start_scope(&mut self) {
|
||||
self.scopes.push(ScopeMarker::new());
|
||||
}
|
||||
|
||||
fn end_scope(&mut self) {
|
||||
let scope = self.scopes.pop().unwrap();
|
||||
scope.flush(self.flat_resolver);
|
||||
}
|
||||
|
||||
fn add_or_get_in_current_scope_untyped(
|
||||
&mut self,
|
||||
name: &'input str,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
let current_scope = self.scopes.last_mut().unwrap();
|
||||
Ok(
|
||||
match current_scope.name_to_ident.entry(Cow::Borrowed(name)) {
|
||||
hash_map::Entry::Occupied(occupied_entry) => {
|
||||
let ident = *occupied_entry.get();
|
||||
let entry = current_scope
|
||||
.ident_map
|
||||
.get(&ident)
|
||||
.ok_or_else(|| error_unreachable())?;
|
||||
if entry.type_space.is_some() {
|
||||
return Err(error_unknown_symbol());
|
||||
}
|
||||
ident
|
||||
}
|
||||
hash_map::Entry::Vacant(vacant_entry) => {
|
||||
let new_id = self.flat_resolver.current_id;
|
||||
self.flat_resolver.current_id.0 += 1;
|
||||
vacant_entry.insert(new_id);
|
||||
current_scope.ident_map.insert(
|
||||
new_id,
|
||||
IdentEntry {
|
||||
name: Some(Cow::Borrowed(name)),
|
||||
type_space: None,
|
||||
},
|
||||
);
|
||||
new_id
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn add(
|
||||
&mut self,
|
||||
name: Cow<'input, str>,
|
||||
type_space: Option<(ast::Type, ast::StateSpace)>,
|
||||
) -> Result<SpirvWord, TranslateError> {
|
||||
let result = self.flat_resolver.current_id;
|
||||
self.flat_resolver.current_id.0 += 1;
|
||||
let current_scope = self.scopes.last_mut().unwrap();
|
||||
if current_scope
|
||||
.name_to_ident
|
||||
.insert(name.clone(), result)
|
||||
.is_some()
|
||||
{
|
||||
return Err(error_unknown_symbol());
|
||||
}
|
||||
current_scope.ident_map.insert(
|
||||
result,
|
||||
IdentEntry {
|
||||
name: Some(name),
|
||||
type_space,
|
||||
},
|
||||
);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn get(&mut self, name: &str) -> Result<SpirvWord, TranslateError> {
|
||||
self.scopes
|
||||
.iter()
|
||||
.rev()
|
||||
.find_map(|resolver| resolver.name_to_ident.get(name).copied())
|
||||
.ok_or_else(|| error_unreachable())
|
||||
}
|
||||
|
||||
fn get_in_current_scope(&self, label: &'input str) -> Result<SpirvWord, TranslateError> {
|
||||
let current_scope = self.scopes.last().unwrap();
|
||||
current_scope
|
||||
.name_to_ident
|
||||
.get(label)
|
||||
.copied()
|
||||
.ok_or_else(|| error_unreachable())
|
||||
}
|
||||
}
|
||||
|
||||
struct ScopeMarker<'input> {
|
||||
ident_map: FxHashMap<SpirvWord, IdentEntry<'input>>,
|
||||
name_to_ident: FxHashMap<Cow<'input, str>, SpirvWord>,
|
||||
}
|
||||
|
||||
impl<'input> ScopeMarker<'input> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
ident_map: FxHashMap::default(),
|
||||
name_to_ident: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(self, resolver: &mut GlobalStringIdentResolver2<'input>) {
|
||||
resolver.ident_map.extend(self.ident_map);
|
||||
}
|
||||
}
|
||||
|
||||
struct SpecialRegistersMap2 {
|
||||
reg_to_id: FxHashMap<PtxSpecialRegister, SpirvWord>,
|
||||
id_to_reg: FxHashMap<SpirvWord, PtxSpecialRegister>,
|
||||
}
|
||||
|
||||
impl SpecialRegistersMap2 {
|
||||
fn new(resolver: &mut ScopedResolver) -> Result<Self, TranslateError> {
|
||||
let mut result = SpecialRegistersMap2 {
|
||||
reg_to_id: FxHashMap::default(),
|
||||
id_to_reg: FxHashMap::default(),
|
||||
};
|
||||
for sreg in PtxSpecialRegister::iter() {
|
||||
let text = sreg.as_str();
|
||||
let id = resolver.add(
|
||||
Cow::Borrowed(text),
|
||||
Some((sreg.get_type(), ast::StateSpace::Reg)),
|
||||
)?;
|
||||
result.reg_to_id.insert(sreg, id);
|
||||
result.id_to_reg.insert(id, sreg);
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn get(&self, id: SpirvWord) -> Option<PtxSpecialRegister> {
|
||||
self.id_to_reg.get(&id).copied()
|
||||
}
|
||||
|
||||
fn len() -> usize {
|
||||
PtxSpecialRegister::iter().len()
|
||||
}
|
||||
|
||||
fn foreach_declaration<'a, 'input>(
|
||||
resolver: &'a mut GlobalStringIdentResolver2<'input>,
|
||||
mut fn_: impl FnMut(
|
||||
PtxSpecialRegister,
|
||||
(
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
SpirvWord,
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
),
|
||||
),
|
||||
) {
|
||||
for sreg in PtxSpecialRegister::iter() {
|
||||
let external_fn_name = [ZLUDA_PTX_PREFIX, sreg.get_unprefixed_function_name()].concat();
|
||||
let name = resolver.register_named(Cow::Owned(external_fn_name), None);
|
||||
let return_type = sreg.get_function_return_type();
|
||||
let input_type = sreg.get_function_input_type();
|
||||
let return_arguments = vec![ast::Variable {
|
||||
align: None,
|
||||
v_type: return_type.into(),
|
||||
state_space: ast::StateSpace::Reg,
|
||||
name: resolver.register_unnamed(Some((return_type.into(), ast::StateSpace::Reg))),
|
||||
array_init: Vec::new(),
|
||||
}];
|
||||
let input_arguments = input_type
|
||||
.into_iter()
|
||||
.map(|type_| ast::Variable {
|
||||
align: None,
|
||||
v_type: type_.into(),
|
||||
state_space: ast::StateSpace::Reg,
|
||||
name: resolver.register_unnamed(Some((type_.into(), ast::StateSpace::Reg))),
|
||||
array_init: Vec::new(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
fn_(sreg, (return_arguments, name, input_arguments));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VectorRead {
|
||||
scalar_type: ast::ScalarType,
|
||||
vector_width: u8,
|
||||
scalar_dst: SpirvWord,
|
||||
vector_src: SpirvWord,
|
||||
member: u8,
|
||||
}
|
||||
|
||||
pub struct VectorWrite {
|
||||
scalar_type: ast::ScalarType,
|
||||
vector_width: u8,
|
||||
vector_dst: SpirvWord,
|
||||
vector_src: SpirvWord,
|
||||
scalar_src: SpirvWord,
|
||||
member: u8,
|
||||
}
|
||||
|
||||
fn scalar_to_ptx_name(this: ast::ScalarType) -> &'static str {
|
||||
match this {
|
||||
ast::ScalarType::B8 => "b8",
|
||||
ast::ScalarType::B16 => "b16",
|
||||
ast::ScalarType::B32 => "b32",
|
||||
ast::ScalarType::B64 => "b64",
|
||||
ast::ScalarType::B128 => "b128",
|
||||
ast::ScalarType::U8 => "u8",
|
||||
ast::ScalarType::U16 => "u16",
|
||||
ast::ScalarType::U16x2 => "u16x2",
|
||||
ast::ScalarType::U32 => "u32",
|
||||
ast::ScalarType::U64 => "u64",
|
||||
ast::ScalarType::S8 => "s8",
|
||||
ast::ScalarType::S16 => "s16",
|
||||
ast::ScalarType::S16x2 => "s16x2",
|
||||
ast::ScalarType::S32 => "s32",
|
||||
ast::ScalarType::S64 => "s64",
|
||||
ast::ScalarType::F16 => "f16",
|
||||
ast::ScalarType::F16x2 => "f16x2",
|
||||
ast::ScalarType::F32 => "f32",
|
||||
ast::ScalarType::F64 => "f64",
|
||||
ast::ScalarType::BF16 => "bf16",
|
||||
ast::ScalarType::BF16x2 => "bf16x2",
|
||||
ast::ScalarType::Pred => "pred",
|
||||
}
|
||||
}
|
||||
|
||||
type UnconditionalStatement =
|
||||
Statement<ast::Instruction<ast::ParsedOperand<SpirvWord>>, ast::ParsedOperand<SpirvWord>>;
|
134
ptx/src/pass/normalize_basic_blocks.rs
Normal file
134
ptx/src/pass/normalize_basic_blocks.rs
Normal file
@ -0,0 +1,134 @@
|
||||
use super::*;
|
||||
|
||||
// This pass normalizes ptx modules in two ways that makes mode computation pass
|
||||
// and code emissions passes much simpler:
|
||||
// * Inserts label at the start of every function
|
||||
// This makes control flow graph simpler in mode computation block: we can
|
||||
// represent kernels as separate nodes with its own separate entry/exit mode
|
||||
// * Inserts label at the start of every basic block
|
||||
// * Insert explicit jumps before labels
|
||||
// * Non-.entry methods get a single `ret;` exit point - this is because mode computation
|
||||
// logic requires it. Control flow graph constructed by mode computation
|
||||
// models function calls as jumps into and then from another function.
|
||||
// If this cfg allowed multiple return basic blocks then there would be cases
|
||||
// where we want to insert mode setting instruction along the edge between
|
||||
// `ret;` and bb in the caller. This is only possible if there's a single
|
||||
// edge between from function `ret;` and caller
|
||||
pub(crate) fn run(
|
||||
flat_resolver: &mut GlobalStringIdentResolver2<'_>,
|
||||
mut directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
for directive in directives.iter_mut() {
|
||||
let (body_ref, is_kernel) = match directive {
|
||||
Directive2::Method(Function2 {
|
||||
body: Some(body), is_kernel, ..
|
||||
}) => (body, *is_kernel),
|
||||
_ => continue,
|
||||
};
|
||||
let body = std::mem::replace(body_ref, Vec::new());
|
||||
let mut result = Vec::with_capacity(body.len());
|
||||
let mut previous_instruction_was_terminator = TerminatorKind::Not;
|
||||
let mut body_iterator = body.into_iter();
|
||||
let mut return_statements = Vec::new();
|
||||
match body_iterator.next() {
|
||||
Some(Statement::Label(_)) => {}
|
||||
Some(statement) => {
|
||||
result.push(Statement::Label(flat_resolver.register_unnamed(None)));
|
||||
result.push(statement);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
for statement in body_iterator {
|
||||
match previous_instruction_was_terminator {
|
||||
TerminatorKind::Not => match statement {
|
||||
Statement::Label(label) => {
|
||||
result.push(Statement::Instruction(ast::Instruction::Bra {
|
||||
arguments: ast::BraArgs { src: label },
|
||||
}))
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
TerminatorKind::Real => {
|
||||
if !matches!(statement, Statement::Label(..)) {
|
||||
result.push(Statement::Label(flat_resolver.register_unnamed(None)));
|
||||
}
|
||||
}
|
||||
TerminatorKind::Fake => match statement {
|
||||
// If there's a label after a call just reuse it
|
||||
Statement::Label(label) => {
|
||||
result.push(Statement::Instruction(ast::Instruction::Bra {
|
||||
arguments: ast::BraArgs { src: label },
|
||||
}))
|
||||
}
|
||||
_ => {
|
||||
let label = flat_resolver.register_unnamed(None);
|
||||
result.push(Statement::Instruction(ast::Instruction::Bra {
|
||||
arguments: ast::BraArgs { src: label },
|
||||
}));
|
||||
result.push(Statement::Label(label));
|
||||
}
|
||||
},
|
||||
}
|
||||
match statement {
|
||||
Statement::RetValue(..) => {
|
||||
return Err(error_unreachable());
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::Ret { .. }) => {
|
||||
if !is_kernel {
|
||||
return_statements.push(result.len());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
previous_instruction_was_terminator = is_block_terminator(&statement);
|
||||
result.push(statement);
|
||||
}
|
||||
convert_from_multiple_returns_to_single_return(
|
||||
flat_resolver,
|
||||
&mut result,
|
||||
return_statements,
|
||||
)?;
|
||||
*body_ref = result;
|
||||
}
|
||||
Ok(directives)
|
||||
}
|
||||
|
||||
enum TerminatorKind {
|
||||
Not,
|
||||
Real,
|
||||
Fake,
|
||||
}
|
||||
|
||||
fn convert_from_multiple_returns_to_single_return(
|
||||
flat_resolver: &mut GlobalStringIdentResolver2<'_>,
|
||||
result: &mut Vec<Statement<ptx_parser::Instruction<SpirvWord>, SpirvWord>>,
|
||||
return_statements: Vec<usize>,
|
||||
) -> Result<(), TranslateError> {
|
||||
Ok(if return_statements.len() > 1 {
|
||||
let ret_bb = flat_resolver.register_unnamed(None);
|
||||
result.push(Statement::Label(ret_bb));
|
||||
result.push(Statement::Instruction(ast::Instruction::Ret {
|
||||
data: ast::RetData { uniform: false },
|
||||
}));
|
||||
for ret_index in return_statements {
|
||||
let statement = result.get_mut(ret_index).ok_or_else(error_unreachable)?;
|
||||
*statement = Statement::Instruction(ast::Instruction::Bra {
|
||||
arguments: ast::BraArgs { src: ret_bb },
|
||||
});
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn is_block_terminator(
|
||||
statement: &Statement<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> TerminatorKind {
|
||||
match statement {
|
||||
Statement::Conditional(..)
|
||||
| Statement::Instruction(ast::Instruction::Bra { .. })
|
||||
// Normally call is not a terminator, but we treat it as such because it
|
||||
// makes the "instruction modes to global modes" pass possible
|
||||
| Statement::Instruction(ast::Instruction::Ret { .. }) => TerminatorKind::Real,
|
||||
Statement::Instruction(ast::Instruction::Call { .. }) => TerminatorKind::Fake,
|
||||
_ => TerminatorKind::Not,
|
||||
}
|
||||
}
|
194
ptx/src/pass/normalize_identifiers2.rs
Normal file
194
ptx/src/pass/normalize_identifiers2.rs
Normal file
@ -0,0 +1,194 @@
|
||||
use super::*;
|
||||
use ptx_parser as ast;
|
||||
|
||||
pub(crate) fn run<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
directives: Vec<ast::Directive<'input, ast::ParsedOperand<&'input str>>>,
|
||||
) -> Result<Vec<NormalizedDirective2>, TranslateError> {
|
||||
resolver.start_scope();
|
||||
let result = directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(resolver, directive))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
resolver.end_scope();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn run_directive<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
directive: ast::Directive<'input, ast::ParsedOperand<&'input str>>,
|
||||
) -> Result<NormalizedDirective2, TranslateError> {
|
||||
Ok(match directive {
|
||||
ast::Directive::Variable(linking, var) => {
|
||||
NormalizedDirective2::Variable(linking, run_variable(resolver, var)?)
|
||||
}
|
||||
ast::Directive::Method(linking, directive) => {
|
||||
NormalizedDirective2::Method(run_method(resolver, linking, directive)?)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn run_method<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
linkage: ast::LinkingDirective,
|
||||
method: ast::Function<'input, &'input str, ast::Statement<ast::ParsedOperand<&'input str>>>,
|
||||
) -> Result<NormalizedFunction2, TranslateError> {
|
||||
let is_kernel = method.func_directive.name.is_kernel();
|
||||
let name = resolver.add_or_get_in_current_scope_untyped(method.func_directive.name.text())?;
|
||||
resolver.start_scope();
|
||||
let (return_arguments, input_arguments) = run_function_decl(resolver, method.func_directive)?;
|
||||
let body = method
|
||||
.body
|
||||
.map(|statements| {
|
||||
let mut result = Vec::with_capacity(statements.len());
|
||||
run_statements(resolver, &mut result, statements)?;
|
||||
Ok::<_, TranslateError>(result)
|
||||
})
|
||||
.transpose()?;
|
||||
resolver.end_scope();
|
||||
Ok(Function2 {
|
||||
return_arguments,
|
||||
name,
|
||||
input_arguments,
|
||||
body,
|
||||
import_as: None,
|
||||
linkage,
|
||||
is_kernel,
|
||||
tuning: method.tuning,
|
||||
flush_to_zero_f32: false,
|
||||
flush_to_zero_f16f64: false,
|
||||
rounding_mode_f32: ptx_parser::RoundingMode::NearestEven,
|
||||
rounding_mode_f16f64: ptx_parser::RoundingMode::NearestEven,
|
||||
})
|
||||
}
|
||||
|
||||
fn run_function_decl<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
func_directive: ast::MethodDeclaration<'input, &'input str>,
|
||||
) -> Result<(Vec<ast::Variable<SpirvWord>>, Vec<ast::Variable<SpirvWord>>), TranslateError> {
|
||||
assert!(func_directive.shared_mem.is_none());
|
||||
let return_arguments = func_directive
|
||||
.return_arguments
|
||||
.into_iter()
|
||||
.map(|var| run_variable(resolver, var))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let input_arguments = func_directive
|
||||
.input_arguments
|
||||
.into_iter()
|
||||
.map(|var| run_variable(resolver, var))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok((return_arguments, input_arguments))
|
||||
}
|
||||
|
||||
fn run_variable<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
variable: ast::Variable<&'input str>,
|
||||
) -> Result<ast::Variable<SpirvWord>, TranslateError> {
|
||||
Ok(ast::Variable {
|
||||
name: resolver.add(
|
||||
Cow::Borrowed(variable.name),
|
||||
Some((variable.v_type.clone(), variable.state_space)),
|
||||
)?,
|
||||
align: variable.align,
|
||||
v_type: variable.v_type,
|
||||
state_space: variable.state_space,
|
||||
array_init: variable.array_init,
|
||||
})
|
||||
}
|
||||
|
||||
fn run_statements<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
result: &mut Vec<NormalizedStatement>,
|
||||
statements: Vec<ast::Statement<ast::ParsedOperand<&'input str>>>,
|
||||
) -> Result<(), TranslateError> {
|
||||
for statement in statements.iter() {
|
||||
match statement {
|
||||
ast::Statement::Label(label) => {
|
||||
resolver.add(Cow::Borrowed(*label), None)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
for statement in statements {
|
||||
match statement {
|
||||
ast::Statement::Label(label) => {
|
||||
result.push(Statement::Label(resolver.get_in_current_scope(label)?))
|
||||
}
|
||||
ast::Statement::Variable(variable) => run_multivariable(resolver, result, variable)?,
|
||||
ast::Statement::Instruction(predicate, instruction) => {
|
||||
result.push(Statement::Instruction((
|
||||
predicate
|
||||
.map(|pred| {
|
||||
Ok::<_, TranslateError>(ast::PredAt {
|
||||
not: pred.not,
|
||||
label: resolver.get(pred.label)?,
|
||||
})
|
||||
})
|
||||
.transpose()?,
|
||||
run_instruction(resolver, instruction)?,
|
||||
)))
|
||||
}
|
||||
ast::Statement::Block(block) => {
|
||||
resolver.start_scope();
|
||||
run_statements(resolver, result, block)?;
|
||||
resolver.end_scope();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_instruction<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
instruction: ast::Instruction<ast::ParsedOperand<&'input str>>,
|
||||
) -> Result<ast::Instruction<ast::ParsedOperand<SpirvWord>>, TranslateError> {
|
||||
ast::visit_map(instruction, &mut |name: &'input str,
|
||||
_: Option<(
|
||||
&ast::Type,
|
||||
ast::StateSpace,
|
||||
)>,
|
||||
_,
|
||||
_| {
|
||||
resolver.get(&name)
|
||||
})
|
||||
}
|
||||
|
||||
fn run_multivariable<'input, 'b>(
|
||||
resolver: &mut ScopedResolver<'input, 'b>,
|
||||
result: &mut Vec<NormalizedStatement>,
|
||||
variable: ast::MultiVariable<&'input str>,
|
||||
) -> Result<(), TranslateError> {
|
||||
match variable.count {
|
||||
Some(count) => {
|
||||
for i in 0..count {
|
||||
let name = Cow::Owned(format!("{}{}", variable.var.name, i));
|
||||
let ident = resolver.add(
|
||||
name,
|
||||
Some((variable.var.v_type.clone(), variable.var.state_space)),
|
||||
)?;
|
||||
result.push(Statement::Variable(ast::Variable {
|
||||
align: variable.var.align,
|
||||
v_type: variable.var.v_type.clone(),
|
||||
state_space: variable.var.state_space,
|
||||
name: ident,
|
||||
array_init: variable.var.array_init.clone(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let name = Cow::Borrowed(variable.var.name);
|
||||
let ident = resolver.add(
|
||||
name,
|
||||
Some((variable.var.v_type.clone(), variable.var.state_space)),
|
||||
)?;
|
||||
result.push(Statement::Variable(ast::Variable {
|
||||
align: variable.var.align,
|
||||
v_type: variable.var.v_type.clone(),
|
||||
state_space: variable.var.state_space,
|
||||
name: ident,
|
||||
array_init: variable.var.array_init.clone(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
90
ptx/src/pass/normalize_predicates2.rs
Normal file
90
ptx/src/pass/normalize_predicates2.rs
Normal file
@ -0,0 +1,90 @@
|
||||
use super::*;
|
||||
use ptx_parser as ast;
|
||||
|
||||
pub(crate) fn run<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<NormalizedDirective2>,
|
||||
) -> Result<Vec<UnconditionalDirective>, TranslateError> {
|
||||
directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(resolver, directive))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn run_directive<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directive: NormalizedDirective2,
|
||||
) -> Result<UnconditionalDirective, TranslateError> {
|
||||
Ok(match directive {
|
||||
Directive2::Variable(linking, var) => Directive2::Variable(linking, var),
|
||||
Directive2::Method(method) => Directive2::Method(run_method(resolver, method)?),
|
||||
})
|
||||
}
|
||||
|
||||
fn run_method<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
method: NormalizedFunction2,
|
||||
) -> Result<UnconditionalFunction, TranslateError> {
|
||||
let body = method
|
||||
.body
|
||||
.map(|statements| {
|
||||
let mut result = Vec::with_capacity(statements.len());
|
||||
for statement in statements {
|
||||
run_statement(resolver, &mut result, statement)?;
|
||||
}
|
||||
Ok::<_, TranslateError>(result)
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(Function2 {
|
||||
body,
|
||||
return_arguments: method.return_arguments,
|
||||
name: method.name,
|
||||
input_arguments: method.input_arguments,
|
||||
import_as: method.import_as,
|
||||
tuning: method.tuning,
|
||||
linkage: method.linkage,
|
||||
is_kernel: method.is_kernel,
|
||||
flush_to_zero_f32: method.flush_to_zero_f32,
|
||||
flush_to_zero_f16f64: method.flush_to_zero_f16f64,
|
||||
rounding_mode_f32: method.rounding_mode_f32,
|
||||
rounding_mode_f16f64: method.rounding_mode_f16f64,
|
||||
})
|
||||
}
|
||||
|
||||
fn run_statement<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
result: &mut Vec<UnconditionalStatement>,
|
||||
statement: NormalizedStatement,
|
||||
) -> Result<(), TranslateError> {
|
||||
Ok(match statement {
|
||||
Statement::Label(label) => result.push(Statement::Label(label)),
|
||||
Statement::Variable(var) => result.push(Statement::Variable(var)),
|
||||
Statement::Instruction((predicate, instruction)) => {
|
||||
if let Some(pred) = predicate {
|
||||
let if_true = resolver.register_unnamed(None);
|
||||
let if_false = resolver.register_unnamed(None);
|
||||
let folded_bra = match &instruction {
|
||||
ast::Instruction::Bra { arguments, .. } => Some(arguments.src),
|
||||
_ => None,
|
||||
};
|
||||
let mut branch = BrachCondition {
|
||||
predicate: pred.label,
|
||||
if_true: folded_bra.unwrap_or(if_true),
|
||||
if_false,
|
||||
};
|
||||
if pred.not {
|
||||
std::mem::swap(&mut branch.if_true, &mut branch.if_false);
|
||||
}
|
||||
result.push(Statement::Conditional(branch));
|
||||
if folded_bra.is_none() {
|
||||
result.push(Statement::Label(if_true));
|
||||
result.push(Statement::Instruction(instruction));
|
||||
}
|
||||
result.push(Statement::Label(if_false));
|
||||
} else {
|
||||
result.push(Statement::Instruction(instruction));
|
||||
}
|
||||
}
|
||||
_ => return Err(error_unreachable()),
|
||||
})
|
||||
}
|
122
ptx/src/pass/remove_unreachable_basic_blocks.rs
Normal file
122
ptx/src/pass/remove_unreachable_basic_blocks.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use super::*;
|
||||
use petgraph::{
|
||||
graph::NodeIndex,
|
||||
visit::{Bfs, VisitMap},
|
||||
Graph,
|
||||
};
|
||||
use rustc_hash::FxHashSet;
|
||||
|
||||
pub(crate) fn run(
|
||||
mut directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
let mut reachable_funcs = FxHashSet::default();
|
||||
for directive in directives.iter_mut() {
|
||||
match directive {
|
||||
Directive2::Method(Function2 {
|
||||
body: Some(body), ..
|
||||
}) => {
|
||||
let old_body = std::mem::replace(body, Vec::new());
|
||||
let mut cfg = ControlFlowGraph::new();
|
||||
let mut old_body_iter = old_body.iter();
|
||||
let mut current_bb = match old_body_iter.next() {
|
||||
Some(Statement::Label(label)) => cfg.add_or_get_node(*label),
|
||||
_ => return Err(error_unreachable()),
|
||||
};
|
||||
let first_bb = current_bb;
|
||||
for statement in old_body_iter {
|
||||
match statement {
|
||||
Statement::Label(label) => {
|
||||
current_bb = cfg.add_or_get_node(*label);
|
||||
}
|
||||
Statement::Conditional(branch) => {
|
||||
cfg.add_branch(current_bb, branch.if_true);
|
||||
cfg.add_branch(current_bb, branch.if_false);
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::Bra {
|
||||
arguments: ast::BraArgs { src },
|
||||
}) => {
|
||||
cfg.add_branch(current_bb, *src);
|
||||
}
|
||||
Statement::FunctionPointer(FunctionPointerDetails {
|
||||
src: _func, ..
|
||||
}) => {
|
||||
return Err(error_todo());
|
||||
}
|
||||
Statement::Instruction(ast::Instruction::Call {
|
||||
arguments: ast::CallArgs { func, .. },
|
||||
..
|
||||
}) => {
|
||||
reachable_funcs.insert(*func);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let mut bfs = Bfs::new(&cfg.graph, first_bb);
|
||||
while let Some(_) = bfs.next(&cfg.graph) {}
|
||||
let mut visited = true;
|
||||
*body = try_filter_to_vec(old_body.into_iter(), |statement| {
|
||||
match statement {
|
||||
Statement::Label(label) => {
|
||||
visited = bfs
|
||||
.discovered
|
||||
.is_visited(cfg.nodes.get(label).ok_or_else(error_unreachable)?);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(visited)
|
||||
})?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(directives
|
||||
.into_iter()
|
||||
.filter(|directive| match directive {
|
||||
Directive2::Variable(..) => true,
|
||||
Directive2::Method(Function2 {
|
||||
name, is_kernel, ..
|
||||
}) => *is_kernel || reachable_funcs.contains(name),
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
fn try_filter_to_vec<T, E>(
|
||||
mut iter: impl ExactSizeIterator<Item = T>,
|
||||
mut filter: impl FnMut(&T) -> Result<bool, E>,
|
||||
) -> Result<Vec<T>, E> {
|
||||
iter.try_fold(Vec::with_capacity(iter.len()), |mut vec, item| {
|
||||
match filter(&item) {
|
||||
Ok(true) => vec.push(item),
|
||||
Ok(false) => {}
|
||||
Err(err) => return Err(err),
|
||||
}
|
||||
Ok(vec)
|
||||
})
|
||||
}
|
||||
|
||||
struct ControlFlowGraph {
|
||||
graph: Graph<SpirvWord, ()>,
|
||||
nodes: FxHashMap<SpirvWord, NodeIndex>,
|
||||
}
|
||||
|
||||
impl ControlFlowGraph {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
graph: Graph::new(),
|
||||
nodes: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_or_get_node(&mut self, id: SpirvWord) -> NodeIndex {
|
||||
*self
|
||||
.nodes
|
||||
.entry(id)
|
||||
.or_insert_with(|| self.graph.add_node(id))
|
||||
}
|
||||
|
||||
fn add_branch(&mut self, from: NodeIndex, to: SpirvWord) -> NodeIndex {
|
||||
let to = self.add_or_get_node(to);
|
||||
self.graph.add_edge(from, to, ());
|
||||
to
|
||||
}
|
||||
}
|
191
ptx/src/pass/replace_instructions_with_function_calls.rs
Normal file
191
ptx/src/pass/replace_instructions_with_function_calls.rs
Normal file
@ -0,0 +1,191 @@
|
||||
use super::*;
|
||||
|
||||
pub(super) fn run<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
let mut fn_declarations = FxHashMap::default();
|
||||
let remapped_directives = directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(resolver, &mut fn_declarations, directive))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let mut result = fn_declarations
|
||||
.into_iter()
|
||||
.map(|(_, (return_arguments, name, input_arguments))| {
|
||||
Directive2::Method(Function2 {
|
||||
return_arguments,
|
||||
name: name,
|
||||
input_arguments,
|
||||
body: None,
|
||||
import_as: None,
|
||||
tuning: Vec::new(),
|
||||
linkage: ast::LinkingDirective::EXTERN,
|
||||
is_kernel: false,
|
||||
flush_to_zero_f32: false,
|
||||
flush_to_zero_f16f64: false,
|
||||
rounding_mode_f32: ptx_parser::RoundingMode::NearestEven,
|
||||
rounding_mode_f16f64: ptx_parser::RoundingMode::NearestEven,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
result.extend(remapped_directives);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn run_directive<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
fn_declarations: &mut FxHashMap<
|
||||
Cow<'input, str>,
|
||||
(
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
SpirvWord,
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
),
|
||||
>,
|
||||
directive: Directive2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
) -> Result<Directive2<ast::Instruction<SpirvWord>, SpirvWord>, TranslateError> {
|
||||
Ok(match directive {
|
||||
var @ Directive2::Variable(..) => var,
|
||||
Directive2::Method(mut method) => {
|
||||
method.body = method
|
||||
.body
|
||||
.map(|statements| run_statements(resolver, fn_declarations, statements))
|
||||
.transpose()?;
|
||||
Directive2::Method(method)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn run_statements<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
fn_declarations: &mut FxHashMap<
|
||||
Cow<'input, str>,
|
||||
(
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
SpirvWord,
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
),
|
||||
>,
|
||||
statements: Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
statements
|
||||
.into_iter()
|
||||
.map(|statement| {
|
||||
Ok(match statement {
|
||||
Statement::Instruction(instruction) => {
|
||||
Statement::Instruction(run_instruction(resolver, fn_declarations, instruction)?)
|
||||
}
|
||||
s => s,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn run_instruction<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
fn_declarations: &mut FxHashMap<
|
||||
Cow<'input, str>,
|
||||
(
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
SpirvWord,
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
),
|
||||
>,
|
||||
instruction: ptx_parser::Instruction<SpirvWord>,
|
||||
) -> Result<ptx_parser::Instruction<SpirvWord>, TranslateError> {
|
||||
Ok(match instruction {
|
||||
i @ ptx_parser::Instruction::Activemask { .. } => {
|
||||
to_call(resolver, fn_declarations, "activemask".into(), i)?
|
||||
}
|
||||
i @ ptx_parser::Instruction::Bfe { data, .. } => {
|
||||
let name = ["bfe_", scalar_to_ptx_name(data)].concat();
|
||||
to_call(resolver, fn_declarations, name.into(), i)?
|
||||
}
|
||||
i @ ptx_parser::Instruction::Bfi { data, .. } => {
|
||||
let name = ["bfi_", scalar_to_ptx_name(data)].concat();
|
||||
to_call(resolver, fn_declarations, name.into(), i)?
|
||||
}
|
||||
i @ ptx_parser::Instruction::Bar { .. } => {
|
||||
to_call(resolver, fn_declarations, "bar_sync".into(), i)?
|
||||
}
|
||||
i => i,
|
||||
})
|
||||
}
|
||||
|
||||
fn to_call<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
fn_declarations: &mut FxHashMap<
|
||||
Cow<'input, str>,
|
||||
(
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
SpirvWord,
|
||||
Vec<ast::Variable<SpirvWord>>,
|
||||
),
|
||||
>,
|
||||
name: Cow<'input, str>,
|
||||
i: ast::Instruction<SpirvWord>,
|
||||
) -> Result<ptx_parser::Instruction<SpirvWord>, TranslateError> {
|
||||
let mut data_return = Vec::new();
|
||||
let mut data_input = Vec::new();
|
||||
let mut arguments_return = Vec::new();
|
||||
let mut arguments_input = Vec::new();
|
||||
ast::visit(&i, &mut |name: &SpirvWord,
|
||||
type_space: Option<(
|
||||
&ptx_parser::Type,
|
||||
ptx_parser::StateSpace,
|
||||
)>,
|
||||
is_dst: bool,
|
||||
_: bool| {
|
||||
let (type_, space) = type_space.ok_or_else(error_mismatched_type)?;
|
||||
if is_dst {
|
||||
data_return.push((type_.clone(), space));
|
||||
arguments_return.push(*name);
|
||||
} else {
|
||||
data_input.push((type_.clone(), space));
|
||||
arguments_input.push(*name);
|
||||
};
|
||||
Ok::<_, TranslateError>(())
|
||||
})?;
|
||||
let fn_name = match fn_declarations.entry(name) {
|
||||
hash_map::Entry::Occupied(occupied_entry) => occupied_entry.get().1,
|
||||
hash_map::Entry::Vacant(vacant_entry) => {
|
||||
let name = vacant_entry.key().clone();
|
||||
let full_name = [ZLUDA_PTX_PREFIX, &*name].concat();
|
||||
let name = resolver.register_named(Cow::Owned(full_name.clone()), None);
|
||||
vacant_entry.insert((
|
||||
to_variables(resolver, &data_return),
|
||||
name,
|
||||
to_variables(resolver, &data_input),
|
||||
));
|
||||
name
|
||||
}
|
||||
};
|
||||
Ok(ast::Instruction::Call {
|
||||
data: ptx_parser::CallDetails {
|
||||
uniform: false,
|
||||
return_arguments: data_return,
|
||||
input_arguments: data_input,
|
||||
},
|
||||
arguments: ptx_parser::CallArgs {
|
||||
return_arguments: arguments_return,
|
||||
func: fn_name,
|
||||
input_arguments: arguments_input,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn to_variables<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
arguments: &Vec<(ptx_parser::Type, ptx_parser::StateSpace)>,
|
||||
) -> Vec<ptx_parser::Variable<SpirvWord>> {
|
||||
arguments
|
||||
.iter()
|
||||
.map(|(type_, space)| ast::Variable {
|
||||
align: None,
|
||||
v_type: type_.clone(),
|
||||
state_space: *space,
|
||||
name: resolver.register_unnamed(Some((type_.clone(), *space))),
|
||||
array_init: Vec::new(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
33
ptx/src/pass/replace_known_functions.rs
Normal file
33
ptx/src/pass/replace_known_functions.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use super::{GlobalStringIdentResolver2, NormalizedDirective2, SpirvWord};
|
||||
|
||||
pub(crate) fn run<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
mut directives: Vec<NormalizedDirective2>,
|
||||
) -> Vec<NormalizedDirective2> {
|
||||
for directive in directives.iter_mut() {
|
||||
match directive {
|
||||
NormalizedDirective2::Method(func) => {
|
||||
replace_with_ptx_impl(resolver, func.name);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
directives
|
||||
}
|
||||
|
||||
fn replace_with_ptx_impl<'input>(
|
||||
resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
fn_name: SpirvWord,
|
||||
) {
|
||||
let known_names = ["__assertfail"];
|
||||
if let Some(super::IdentEntry {
|
||||
name: Some(name), ..
|
||||
}) = resolver.ident_map.get_mut(&fn_name)
|
||||
{
|
||||
if known_names.contains(&&**name) {
|
||||
*name = Cow::Owned(format!("__zluda_ptx_impl_{}", name));
|
||||
}
|
||||
}
|
||||
}
|
69
ptx/src/pass/resolve_function_pointers.rs
Normal file
69
ptx/src/pass/resolve_function_pointers.rs
Normal file
@ -0,0 +1,69 @@
|
||||
use super::*;
|
||||
use ptx_parser as ast;
|
||||
use rustc_hash::FxHashSet;
|
||||
|
||||
pub(crate) fn run<'input>(
|
||||
directives: Vec<UnconditionalDirective>,
|
||||
) -> Result<Vec<UnconditionalDirective>, TranslateError> {
|
||||
let mut functions = FxHashSet::default();
|
||||
directives
|
||||
.into_iter()
|
||||
.map(|directive| run_directive(&mut functions, directive))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn run_directive<'input>(
|
||||
functions: &mut FxHashSet<SpirvWord>,
|
||||
directive: UnconditionalDirective,
|
||||
) -> Result<UnconditionalDirective, TranslateError> {
|
||||
Ok(match directive {
|
||||
var @ Directive2::Variable(..) => var,
|
||||
Directive2::Method(method) => {
|
||||
if !method.is_kernel {
|
||||
functions.insert(method.name);
|
||||
}
|
||||
Directive2::Method(run_method(functions, method)?)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn run_method<'input>(
|
||||
functions: &mut FxHashSet<SpirvWord>,
|
||||
method: UnconditionalFunction,
|
||||
) -> Result<UnconditionalFunction, TranslateError> {
|
||||
let body = method
|
||||
.body
|
||||
.map(|statements| {
|
||||
statements
|
||||
.into_iter()
|
||||
.map(|statement| run_statement(functions, statement))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(Function2 { body, ..method })
|
||||
}
|
||||
|
||||
fn run_statement<'input>(
|
||||
functions: &mut FxHashSet<SpirvWord>,
|
||||
statement: UnconditionalStatement,
|
||||
) -> Result<UnconditionalStatement, TranslateError> {
|
||||
Ok(match statement {
|
||||
Statement::Instruction(ast::Instruction::Mov {
|
||||
data,
|
||||
arguments:
|
||||
ast::MovArgs {
|
||||
dst: ast::ParsedOperand::Reg(dst_reg),
|
||||
src: ast::ParsedOperand::Reg(src_reg),
|
||||
},
|
||||
}) if functions.contains(&src_reg) => {
|
||||
if data.typ != ast::Type::Scalar(ast::ScalarType::U64) {
|
||||
return Err(error_mismatched_type());
|
||||
}
|
||||
UnconditionalStatement::FunctionPointer(FunctionPointerDetails {
|
||||
dst: dst_reg,
|
||||
src: src_reg,
|
||||
})
|
||||
}
|
||||
s => s,
|
||||
})
|
||||
}
|
2004
ptx/src/ptx.lalrpop
2004
ptx/src/ptx.lalrpop
File diff suppressed because it is too large
Load Diff
24
ptx/src/test/ll/activemask.ll
Normal file
24
ptx/src/test/ll/activemask.ll
Normal file
@ -0,0 +1,24 @@
|
||||
declare i32 @__zluda_ptx_impl_activemask() #0
|
||||
|
||||
define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 {
|
||||
%"31" = alloca i64, align 8, addrspace(5)
|
||||
%"32" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"28"
|
||||
|
||||
"28": ; preds = %1
|
||||
%"33" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"33", ptr addrspace(5) %"31", align 4
|
||||
%"34" = call i32 @__zluda_ptx_impl_activemask()
|
||||
store i32 %"34", ptr addrspace(5) %"32", align 4
|
||||
%"35" = load i64, ptr addrspace(5) %"31", align 4
|
||||
%"36" = load i32, ptr addrspace(5) %"32", align 4
|
||||
%"37" = inttoptr i64 %"35" to ptr
|
||||
store i32 %"36", ptr %"37", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
30
ptx/src/test/ll/add.ll
Normal file
30
ptx/src/test/ll/add.ll
Normal file
@ -0,0 +1,30 @@
|
||||
define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = add i64 %"43", 1
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
52
ptx/src/test/ll/add_ftz.ll
Normal file
52
ptx/src/test/ll/add_ftz.ll
Normal file
@ -0,0 +1,52 @@
|
||||
define amdgpu_kernel void @add_ftz(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca float, align 4, addrspace(5)
|
||||
%"42" = alloca float, align 4, addrspace(5)
|
||||
%"43" = alloca float, align 4, addrspace(5)
|
||||
%"44" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"61" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load float, ptr %"61", align 4
|
||||
store float %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"62" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"50" = load float, ptr %"33", align 4
|
||||
store float %"50", ptr addrspace(5) %"42", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"42", align 4
|
||||
%"51" = fadd float %"52", %"53"
|
||||
store float %"51", ptr addrspace(5) %"43", align 4
|
||||
call void @llvm.amdgcn.s.setreg(i32 6401, i32 3)
|
||||
%"55" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"56" = load float, ptr addrspace(5) %"42", align 4
|
||||
%"54" = fadd float %"55", %"56"
|
||||
store float %"54", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store float %"58", ptr %"63", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"64" = inttoptr i64 %"59" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"60" = load float, ptr addrspace(5) %"44", align 4
|
||||
store float %"60", ptr %"35", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind willreturn }
|
30
ptx/src/test/ll/add_non_coherent.ll
Normal file
30
ptx/src/test/ll/add_non_coherent.ll
Normal file
@ -0,0 +1,30 @@
|
||||
define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr addrspace(1)
|
||||
%"40" = load i64, ptr addrspace(1) %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = add i64 %"43", 1
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr addrspace(1)
|
||||
store i64 %"45", ptr addrspace(1) %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
30
ptx/src/test/ll/add_tuning.ll
Normal file
30
ptx/src/test/ll/add_tuning.ll
Normal file
@ -0,0 +1,30 @@
|
||||
define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = add i64 %"43", 1
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
36
ptx/src/test/ll/and.ll
Normal file
36
ptx/src/test/ll/and.ll
Normal file
@ -0,0 +1,36 @@
|
||||
define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i32, ptr %"50", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load i32, ptr %"31", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"52" = and i32 %"46", %"47"
|
||||
store i32 %"52", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"55" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"55", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
46
ptx/src/test/ll/atom_add.ll
Normal file
46
ptx/src/test/ll/atom_add.ll
Normal file
@ -0,0 +1,46 @@
|
||||
@shared_mem = external addrspace(3) global [1024 x i8], align 4
|
||||
|
||||
define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"56" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load i32, ptr %"56", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"57" = inttoptr i64 %"46" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"47" = load i32, ptr %"32", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
store i32 %"48", ptr addrspace(3) @shared_mem, align 4
|
||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"50" syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %2, ptr addrspace(5) %"40", align 4
|
||||
%"51" = load i32, ptr addrspace(3) @shared_mem, align 4
|
||||
store i32 %"51", ptr addrspace(5) %"41", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"52" to ptr
|
||||
store i32 %"53", ptr %"61", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"62" = inttoptr i64 %"54" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"55" = load i32, ptr addrspace(5) %"41", align 4
|
||||
store i32 %"55", ptr %"34", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
46
ptx/src/test/ll/atom_add_float.ll
Normal file
46
ptx/src/test/ll/atom_add_float.ll
Normal file
@ -0,0 +1,46 @@
|
||||
@shared_mem = external addrspace(3) global [1024 x i8], align 4
|
||||
|
||||
define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
%"41" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"56" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load float, ptr %"56", align 4
|
||||
store float %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"57" = inttoptr i64 %"46" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"47" = load float, ptr %"32", align 4
|
||||
store float %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
store float %"48", ptr addrspace(3) @shared_mem, align 4
|
||||
%"50" = load float, ptr addrspace(5) %"41", align 4
|
||||
%2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"50" syncscope("agent-one-as") monotonic, align 4
|
||||
store float %2, ptr addrspace(5) %"40", align 4
|
||||
%"51" = load float, ptr addrspace(3) @shared_mem, align 4
|
||||
store float %"51", ptr addrspace(5) %"41", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"52" to ptr
|
||||
store float %"53", ptr %"61", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"62" = inttoptr i64 %"54" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"55" = load float, ptr addrspace(5) %"41", align 4
|
||||
store float %"55", ptr %"34", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
44
ptx/src/test/ll/atom_cas.ll
Normal file
44
ptx/src/test/ll/atom_cas.ll
Normal file
@ -0,0 +1,44 @@
|
||||
define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"37"
|
||||
|
||||
"37": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"57" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i32, ptr %"57", align 4
|
||||
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"58" = inttoptr i64 %"48" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%2 = cmpxchg ptr %"31", i32 %"50", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
|
||||
%"59" = extractvalue { i32, i1 } %2, 0
|
||||
store i32 %"59", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"61", i64 4
|
||||
%"52" = load i32, ptr %"34", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"62" = inttoptr i64 %"53" to ptr
|
||||
store i32 %"54", ptr %"62", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"63" = inttoptr i64 %"55" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
store i32 %"56", ptr %"36", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
46
ptx/src/test/ll/atom_inc.ll
Normal file
46
ptx/src/test/ll/atom_inc.ll
Normal file
@ -0,0 +1,46 @@
|
||||
define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"37"
|
||||
|
||||
"37": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%2 = atomicrmw uinc_wrap ptr %"59", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %2, ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"60" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||
%3 = atomicrmw uinc_wrap ptr addrspace(1) %"60", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %3, ptr addrspace(5) %"43", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"52" to ptr
|
||||
%"51" = load i32, ptr %"61", align 4
|
||||
store i32 %"51", ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"62" = inttoptr i64 %"53" to ptr
|
||||
store i32 %"54", ptr %"62", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"63" = inttoptr i64 %"55" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
store i32 %"56", ptr %"34", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"64" = inttoptr i64 %"57" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"64", i64 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
store i32 %"58", ptr %"36", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
30
ptx/src/test/ll/b64tof64.ll
Normal file
30
ptx/src/test/ll/b64tof64.ll
Normal file
@ -0,0 +1,30 @@
|
||||
define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca double, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"37" = load double, ptr addrspace(4) %"31", align 8
|
||||
store double %"37", ptr addrspace(5) %"33", align 8
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load double, ptr addrspace(5) %"33", align 8
|
||||
%"46" = bitcast double %"40" to i64
|
||||
store i64 %"46", ptr addrspace(5) %"34", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"47" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i64, ptr %"47", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"48" = inttoptr i64 %"43" to ptr
|
||||
store i64 %"44", ptr %"48", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
91
ptx/src/test/ll/bench.ll
Normal file
91
ptx/src/test/ll/bench.ll
Normal file
@ -0,0 +1,91 @@
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @bench(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #1 {
|
||||
%"57" = alloca i64, align 8, addrspace(5)
|
||||
%"58" = alloca i64, align 8, addrspace(5)
|
||||
%"59" = alloca float, align 4, addrspace(5)
|
||||
%"60" = alloca float, align 4, addrspace(5)
|
||||
%"61" = alloca float, align 4, addrspace(5)
|
||||
%"62" = alloca float, align 4, addrspace(5)
|
||||
%"63" = alloca i32, align 4, addrspace(5)
|
||||
%"64" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"97"
|
||||
|
||||
"97": ; preds = %1
|
||||
%"65" = load i64, ptr addrspace(4) %"55", align 4
|
||||
store i64 %"65", ptr addrspace(5) %"57", align 4
|
||||
%"66" = load i64, ptr addrspace(4) %"56", align 4
|
||||
store i64 %"66", ptr addrspace(5) %"58", align 4
|
||||
%"68" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"91" = inttoptr i64 %"68" to ptr
|
||||
%"67" = load float, ptr %"91", align 4
|
||||
store float %"67", ptr addrspace(5) %"59", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"92" = inttoptr i64 %"69" to ptr
|
||||
%"39" = getelementptr inbounds i8, ptr %"92", i64 4
|
||||
%"70" = load float, ptr %"39", align 4
|
||||
store float %"70", ptr addrspace(5) %"60", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"93" = inttoptr i64 %"71" to ptr
|
||||
%"41" = getelementptr inbounds i8, ptr %"93", i64 8
|
||||
%"72" = load float, ptr %"41", align 4
|
||||
store float %"72", ptr addrspace(5) %"61", align 4
|
||||
%"73" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"94" = inttoptr i64 %"73" to ptr
|
||||
%"43" = getelementptr inbounds i8, ptr %"94", i64 12
|
||||
%"74" = load float, ptr %"43", align 4
|
||||
store float %"74", ptr addrspace(5) %"62", align 4
|
||||
store i32 0, ptr addrspace(5) %"63", align 4
|
||||
br label %"10"
|
||||
|
||||
"10": ; preds = %"21", %"97"
|
||||
%"77" = load float, ptr addrspace(5) %"59", align 4
|
||||
%"78" = load float, ptr addrspace(5) %"60", align 4
|
||||
call void asm sideeffect "s_denorm_mode 0", "~{mode}"()
|
||||
%"76" = fmul float %"77", %"78"
|
||||
store float %"76", ptr addrspace(5) %"59", align 4
|
||||
%"80" = load float, ptr addrspace(5) %"61", align 4
|
||||
%"81" = load float, ptr addrspace(5) %"62", align 4
|
||||
call void asm sideeffect "s_denorm_mode 11", "~{mode}"()
|
||||
%"79" = fmul float %"80", %"81"
|
||||
store float %"79", ptr addrspace(5) %"61", align 4
|
||||
%"83" = load i32, ptr addrspace(5) %"63", align 4
|
||||
%"82" = add i32 %"83", 1
|
||||
store i32 %"82", ptr addrspace(5) %"63", align 4
|
||||
%"85" = load i32, ptr addrspace(5) %"63", align 4
|
||||
%"84" = icmp eq i32 %"85", 100000000
|
||||
store i1 %"84", ptr addrspace(5) %"64", align 1
|
||||
%"86" = load i1, ptr addrspace(5) %"64", align 1
|
||||
br i1 %"86", label %"11", label %"21"
|
||||
|
||||
"21": ; preds = %"10"
|
||||
br label %"10"
|
||||
|
||||
"11": ; preds = %"10"
|
||||
%"87" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"88" = load float, ptr addrspace(5) %"59", align 4
|
||||
%"95" = inttoptr i64 %"87" to ptr
|
||||
store float %"88", ptr %"95", align 4
|
||||
%"89" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"96" = inttoptr i64 %"89" to ptr
|
||||
%"48" = getelementptr inbounds i8, ptr %"96", i64 4
|
||||
%"90" = load float, ptr addrspace(5) %"61", align 4
|
||||
store float %"90", ptr %"48", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
46
ptx/src/test/ll/bfe.ll
Normal file
46
ptx/src/test/ll/bfe.ll
Normal file
@ -0,0 +1,46 @@
|
||||
declare i32 @__zluda_ptx_impl_bfe_u32(i32, i32, i32) #0
|
||||
|
||||
define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"43" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"57" = inttoptr i64 %"46" to ptr
|
||||
%"45" = load i32, ptr %"57", align 4
|
||||
store i32 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"58" = inttoptr i64 %"47" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"48" = load i32, ptr %"32", align 4
|
||||
store i32 %"48", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"59" = inttoptr i64 %"49" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"59", i64 8
|
||||
%"50" = load i32, ptr %"34", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"51" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"52", i32 %"53", i32 %"54")
|
||||
store i32 %"51", ptr addrspace(5) %"40", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"60" = inttoptr i64 %"55" to ptr
|
||||
store i32 %"56", ptr %"60", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
53
ptx/src/test/ll/bfi.ll
Normal file
53
ptx/src/test/ll/bfi.ll
Normal file
@ -0,0 +1,53 @@
|
||||
declare i32 @__zluda_ptx_impl_bfi_b32(i32, i32, i32, i32) #0
|
||||
|
||||
define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"38"
|
||||
|
||||
"38": ; preds = %1
|
||||
%"47" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"64" = inttoptr i64 %"50" to ptr
|
||||
%"49" = load i32, ptr %"64", align 4
|
||||
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"65" = inttoptr i64 %"51" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"65", i64 4
|
||||
%"52" = load i32, ptr %"33", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"66" = inttoptr i64 %"53" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"66", i64 8
|
||||
%"54" = load i32, ptr %"35", align 4
|
||||
store i32 %"54", ptr addrspace(5) %"45", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"67" = inttoptr i64 %"55" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"67", i64 12
|
||||
%"56" = load i32, ptr %"37", align 4
|
||||
store i32 %"56", ptr addrspace(5) %"46", align 4
|
||||
%"58" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"59" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"60" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"61" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"68" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"58", i32 %"59", i32 %"60", i32 %"61")
|
||||
store i32 %"68", ptr addrspace(5) %"43", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"63" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"71" = inttoptr i64 %"62" to ptr
|
||||
store i32 %"63", ptr %"71", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
34
ptx/src/test/ll/block.ll
Normal file
34
ptx/src/test/ll/block.ll
Normal file
@ -0,0 +1,34 @@
|
||||
define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"42" = load i64, ptr %"51", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"44" = add i64 %"45", 1
|
||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"47" = add i64 %"48", 1
|
||||
store i64 %"47", ptr addrspace(5) %"46", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"52" = inttoptr i64 %"49" to ptr
|
||||
store i64 %"50", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
36
ptx/src/test/ll/bra.ll
Normal file
36
ptx/src/test/ll/bra.ll
Normal file
@ -0,0 +1,36 @@
|
||||
define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"50" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load i64, ptr %"50", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
br label %"10"
|
||||
|
||||
"10": ; preds = %"35"
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"46" = add i64 %"47", 1
|
||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||
br label %"12"
|
||||
|
||||
"12": ; preds = %"10"
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = inttoptr i64 %"48" to ptr
|
||||
store i64 %"49", ptr %"51", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
33
ptx/src/test/ll/brev.ll
Normal file
33
ptx/src/test/ll/brev.ll
Normal file
@ -0,0 +1,33 @@
|
||||
define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load i32, ptr %"43", align 4
|
||||
store i32 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call i32 @llvm.bitreverse.i32(i32 %"40")
|
||||
store i32 %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store i32 %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.bitreverse.i32(i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
64
ptx/src/test/ll/call.ll
Normal file
64
ptx/src/test/ll/call.ll
Normal file
@ -0,0 +1,64 @@
|
||||
define i64 @incr(i64 %"43") #0 {
|
||||
%"63" = alloca i64, align 8, addrspace(5)
|
||||
%"64" = alloca i64, align 8, addrspace(5)
|
||||
%"65" = alloca i64, align 8, addrspace(5)
|
||||
%"66" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"46"
|
||||
|
||||
"46": ; preds = %1
|
||||
store i64 %"43", ptr addrspace(5) %"65", align 4
|
||||
%"67" = load i64, ptr addrspace(5) %"65", align 4
|
||||
store i64 %"67", ptr addrspace(5) %"66", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"66", align 4
|
||||
%"68" = add i64 %"69", 1
|
||||
store i64 %"68", ptr addrspace(5) %"66", align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"66", align 4
|
||||
store i64 %"70", ptr addrspace(5) %"64", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"64", align 4
|
||||
store i64 %"71", ptr addrspace(5) %"63", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"63", align 4
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #1 {
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
%"57" = alloca i64, align 8, addrspace(5)
|
||||
%"58" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"44"
|
||||
|
||||
"44": ; preds = %1
|
||||
%"53" = load i64, ptr addrspace(4) %"48", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"50", align 4
|
||||
%"54" = load i64, ptr addrspace(4) %"49", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"51", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"72" = inttoptr i64 %"56" to ptr addrspace(1)
|
||||
%"55" = load i64, ptr addrspace(1) %"72", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"52", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"52", align 4
|
||||
store i64 %"59", ptr addrspace(5) %"57", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"41" = call i64 @incr(i64 %"40")
|
||||
br label %"45"
|
||||
|
||||
"45": ; preds = %"44"
|
||||
store i64 %"41", ptr addrspace(5) %"58", align 4
|
||||
%"60" = load i64, ptr addrspace(5) %"58", align 4
|
||||
store i64 %"60", ptr addrspace(5) %"52", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"75" = inttoptr i64 %"61" to ptr addrspace(1)
|
||||
store i64 %"62", ptr addrspace(1) %"75", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
155
ptx/src/test/ll/call_rnd.ll
Normal file
155
ptx/src/test/ll/call_rnd.ll
Normal file
@ -0,0 +1,155 @@
|
||||
define float @add_rm(float %"79", float %"80") #0 {
|
||||
%"128" = alloca float, align 4, addrspace(5)
|
||||
%"129" = alloca float, align 4, addrspace(5)
|
||||
%"130" = alloca float, align 4, addrspace(5)
|
||||
%"131" = alloca float, align 4, addrspace(5)
|
||||
%"132" = alloca float, align 4, addrspace(5)
|
||||
%"133" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"89"
|
||||
|
||||
"89": ; preds = %1
|
||||
call void @llvm.amdgcn.s.setreg(i32 6145, i32 2)
|
||||
br label %"87"
|
||||
|
||||
"87": ; preds = %"89"
|
||||
store float %"79", ptr addrspace(5) %"130", align 4
|
||||
store float %"80", ptr addrspace(5) %"131", align 4
|
||||
%"134" = load float, ptr addrspace(5) %"130", align 4
|
||||
store float %"134", ptr addrspace(5) %"132", align 4
|
||||
%"135" = load float, ptr addrspace(5) %"131", align 4
|
||||
store float %"135", ptr addrspace(5) %"133", align 4
|
||||
%"137" = load float, ptr addrspace(5) %"132", align 4
|
||||
%"138" = load float, ptr addrspace(5) %"133", align 4
|
||||
%"136" = fadd float %"137", %"138"
|
||||
store float %"136", ptr addrspace(5) %"132", align 4
|
||||
%"139" = load float, ptr addrspace(5) %"132", align 4
|
||||
store float %"139", ptr addrspace(5) %"129", align 4
|
||||
%"140" = load float, ptr addrspace(5) %"129", align 4
|
||||
store float %"140", ptr addrspace(5) %"128", align 4
|
||||
%2 = load float, ptr addrspace(5) %"128", align 4
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define float @add_rp(float %"82", float %"83") #0 {
|
||||
%"141" = alloca float, align 4, addrspace(5)
|
||||
%"142" = alloca float, align 4, addrspace(5)
|
||||
%"143" = alloca float, align 4, addrspace(5)
|
||||
%"144" = alloca float, align 4, addrspace(5)
|
||||
%"145" = alloca float, align 4, addrspace(5)
|
||||
%"146" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"88"
|
||||
|
||||
"88": ; preds = %1
|
||||
store float %"82", ptr addrspace(5) %"143", align 4
|
||||
store float %"83", ptr addrspace(5) %"144", align 4
|
||||
%"147" = load float, ptr addrspace(5) %"143", align 4
|
||||
store float %"147", ptr addrspace(5) %"145", align 4
|
||||
%"148" = load float, ptr addrspace(5) %"144", align 4
|
||||
store float %"148", ptr addrspace(5) %"146", align 4
|
||||
%"150" = load float, ptr addrspace(5) %"145", align 4
|
||||
%"151" = load float, ptr addrspace(5) %"146", align 4
|
||||
%"149" = fadd float %"150", %"151"
|
||||
store float %"149", ptr addrspace(5) %"145", align 4
|
||||
%"152" = load float, ptr addrspace(5) %"145", align 4
|
||||
store float %"152", ptr addrspace(5) %"142", align 4
|
||||
%"153" = load float, ptr addrspace(5) %"142", align 4
|
||||
store float %"153", ptr addrspace(5) %"141", align 4
|
||||
%2 = load float, ptr addrspace(5) %"141", align 4
|
||||
ret float %2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"92", ptr addrspace(4) byref(i64) %"93") #1 {
|
||||
%"94" = alloca i64, align 8, addrspace(5)
|
||||
%"95" = alloca i64, align 8, addrspace(5)
|
||||
%"96" = alloca float, align 4, addrspace(5)
|
||||
%"97" = alloca float, align 4, addrspace(5)
|
||||
%"98" = alloca float, align 4, addrspace(5)
|
||||
%"99" = alloca float, align 4, addrspace(5)
|
||||
%"100" = alloca float, align 4, addrspace(5)
|
||||
%"101" = alloca float, align 4, addrspace(5)
|
||||
%"102" = alloca float, align 4, addrspace(5)
|
||||
%"103" = alloca float, align 4, addrspace(5)
|
||||
%"104" = alloca float, align 4, addrspace(5)
|
||||
%"105" = alloca float, align 4, addrspace(5)
|
||||
%"106" = alloca float, align 4, addrspace(5)
|
||||
%"107" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"84"
|
||||
|
||||
"84": ; preds = %1
|
||||
call void @llvm.amdgcn.s.setreg(i32 6145, i32 1)
|
||||
%"108" = load i64, ptr addrspace(4) %"92", align 4
|
||||
store i64 %"108", ptr addrspace(5) %"94", align 4
|
||||
%"109" = load i64, ptr addrspace(4) %"93", align 4
|
||||
store i64 %"109", ptr addrspace(5) %"95", align 4
|
||||
%"111" = load i64, ptr addrspace(5) %"94", align 4
|
||||
%"154" = inttoptr i64 %"111" to ptr
|
||||
%"110" = load float, ptr %"154", align 4
|
||||
store float %"110", ptr addrspace(5) %"96", align 4
|
||||
%"112" = load i64, ptr addrspace(5) %"94", align 4
|
||||
%"155" = inttoptr i64 %"112" to ptr
|
||||
%"59" = getelementptr inbounds i8, ptr %"155", i64 4
|
||||
%"113" = load float, ptr %"59", align 4
|
||||
store float %"113", ptr addrspace(5) %"97", align 4
|
||||
%"114" = load i64, ptr addrspace(5) %"94", align 4
|
||||
%"156" = inttoptr i64 %"114" to ptr
|
||||
%"61" = getelementptr inbounds i8, ptr %"156", i64 8
|
||||
%"115" = load float, ptr %"61", align 4
|
||||
store float %"115", ptr addrspace(5) %"98", align 4
|
||||
%"116" = load i64, ptr addrspace(5) %"94", align 4
|
||||
%"157" = inttoptr i64 %"116" to ptr
|
||||
%"63" = getelementptr inbounds i8, ptr %"157", i64 12
|
||||
%"117" = load float, ptr %"63", align 4
|
||||
store float %"117", ptr addrspace(5) %"99", align 4
|
||||
%"118" = load float, ptr addrspace(5) %"96", align 4
|
||||
store float %"118", ptr addrspace(5) %"102", align 4
|
||||
%"119" = load float, ptr addrspace(5) %"97", align 4
|
||||
store float %"119", ptr addrspace(5) %"103", align 4
|
||||
%"72" = load float, ptr addrspace(5) %"102", align 4
|
||||
%"73" = load float, ptr addrspace(5) %"103", align 4
|
||||
%"74" = call float @add_rp(float %"72", float %"73")
|
||||
br label %"85"
|
||||
|
||||
"85": ; preds = %"84"
|
||||
store float %"74", ptr addrspace(5) %"104", align 4
|
||||
%"120" = load float, ptr addrspace(5) %"104", align 4
|
||||
store float %"120", ptr addrspace(5) %"100", align 4
|
||||
%"121" = load i64, ptr addrspace(5) %"95", align 4
|
||||
%"122" = load float, ptr addrspace(5) %"100", align 4
|
||||
%"158" = inttoptr i64 %"121" to ptr
|
||||
store float %"122", ptr %"158", align 4
|
||||
%"123" = load float, ptr addrspace(5) %"98", align 4
|
||||
store float %"123", ptr addrspace(5) %"105", align 4
|
||||
%"124" = load float, ptr addrspace(5) %"99", align 4
|
||||
store float %"124", ptr addrspace(5) %"106", align 4
|
||||
%"75" = load float, ptr addrspace(5) %"105", align 4
|
||||
%"76" = load float, ptr addrspace(5) %"106", align 4
|
||||
%"77" = call float @add_rm(float %"75", float %"76")
|
||||
br label %"86"
|
||||
|
||||
"86": ; preds = %"85"
|
||||
store float %"77", ptr addrspace(5) %"107", align 4
|
||||
%"125" = load float, ptr addrspace(5) %"107", align 4
|
||||
store float %"125", ptr addrspace(5) %"101", align 4
|
||||
%"126" = load i64, ptr addrspace(5) %"95", align 4
|
||||
%"159" = inttoptr i64 %"126" to ptr
|
||||
%"65" = getelementptr inbounds i8, ptr %"159", i64 4
|
||||
%"127" = load float, ptr addrspace(5) %"101", align 4
|
||||
store float %"127", ptr %"65", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind willreturn }
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user