From ef0c4afcf903ec910aa579c4c251b9467bf00273 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Thu, 3 Jul 2025 16:07:00 -0700 Subject: [PATCH] Run unit tests on every commit (#401) --- .github/workflows/move_tests.sh | 12 ++++ .github/workflows/pr_master.yml | 91 +++++++++++++++++++++----- .github/workflows/push_master.yml | 92 ++++++++++++++++++++++----- .github/workflows/rocm_setup_build.sh | 17 +++++ .github/workflows/rocm_setup_run.sh | 26 ++++++++ llvm_zluda/build.rs | 16 +++++ ptx/src/test/spirv_run/mod.rs | 16 ++--- zluda/src/impl/module.rs | 4 +- zluda_inject/tests/inject.rs | 1 + 9 files changed, 235 insertions(+), 40 deletions(-) create mode 100644 .github/workflows/move_tests.sh create mode 100644 .github/workflows/rocm_setup_build.sh create mode 100644 .github/workflows/rocm_setup_run.sh diff --git a/.github/workflows/move_tests.sh b/.github/workflows/move_tests.sh new file mode 100644 index 0000000..4f908ba --- /dev/null +++ b/.github/workflows/move_tests.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -ex +TEST_EXECUTABLES_DIR=$1 +SUFFIX=$2 + +ls ${TEST_EXECUTABLES_DIR}/* | sort -u | while read -r executable; do + output=$("$executable" --list 2>/dev/null) + exit_code=$? + if [ $exit_code -eq 0 ] && echo "$output" | grep -q "_${SUFFIX}: test$"; then + mv "$executable" "${TEST_EXECUTABLES_DIR}/../${SUFFIX}/" + fi +done diff --git a/.github/workflows/pr_master.yml b/.github/workflows/pr_master.yml index f0fd783..3218578 100644 --- a/.github/workflows/pr_master.yml +++ b/.github/workflows/pr_master.yml @@ -5,6 +5,7 @@ on: env: CARGO_TERM_COLOR: always + SCCACHE_GHA_ENABLED: "true" CARGO_PROFILE: release ROCM_VERSION: "6.3.1" @@ -14,23 +15,21 @@ jobs: runs-on: ubuntu-22.04 steps: - uses: jlumbroso/free-disk-space@main - - name: Install ROCm - run: | - sudo mkdir --parents --mode=0755 /etc/apt/keyrings - sudo sh -c 'wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null' - sudo sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }} jammy main > /etc/apt/sources.list.d/rocm.list' - sudo sh -c 'echo Package: * > /etc/apt/preferences.d/rocm-pin-600' - sudo sh -c 'echo Pin: release o=repo.radeon.com >> /etc/apt/preferences.d/rocm-pin-600' - sudo sh -c 'echo Pin-Priority: 600 >> /etc/apt/preferences.d/rocm-pin-600' - sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib rocm-llvm-dev hip-runtime-amd hip-dev - echo 'export PATH="$PATH:/opt/rocm/bin"' | sudo tee /etc/profile.d/rocm.sh - echo '/opt/rocm/lib' | sudo tee /etc/ld.so.conf.d/rocm.conf - sudo ldconfig + with: + # Removing Android stuff should be enough + android: true + dotnet: false + haskell: false + large-packages: false + docker-images: false + swap-storage: false - uses: actions/checkout@v4 with: submodules: true - - uses: Swatinem/rust-cache@v2 + - name: Install ROCm + run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }} + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 - name: Build # https://github.com/actions/upload-artifact/issues/39 run: | @@ -51,7 +50,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: true - - uses: Swatinem/rust-cache@v2 + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 - name: Build run: | cargo xtask zip --profile ${{ env.CARGO_PROFILE }} @@ -63,3 +63,64 @@ jobs: with: name: zluda-windows-${{ env.SHORT_SHA }} path: target/${{ env.CARGO_PROFILE }}/zluda + build_tests: + name: Build AMD GPU unit tests + runs-on: ubuntu-22.04 + outputs: + test_package: ${{ steps.upload_artifacts.outputs.artifact-id }} + steps: + - uses: jlumbroso/free-disk-space@main + with: + # Removing Android stuff should be enough + android: true + dotnet: false + haskell: false + large-packages: false + docker-images: false + swap-storage: false + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install ROCm + run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }} + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 + - uses: taiki-e/install-action@v2 + with: + tool: cargo-export + - name: Build + run: | + cargo export target/tests -- test --features ci_build --workspace --exclude cuda_base --exclude ptx_parser_macros + mkdir -p target/amdgpu + bash .github/workflows/move_tests.sh target/tests amdgpu + strip target/amdgpu/* + - name: Upload + id: upload_artifacts + uses: actions/upload-artifact@v4 + with: + name: tests + path: target/amdgpu + retention-days: 7 + run_tests: + name: Run AMD GPU unit tests + runs-on: gpu_small + needs: [build_tests] + steps: + - uses: actions/checkout@v4 + with: + submodules: false + sparse-checkout: .github + - name: Install ROCm + run: sudo bash .github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} + - uses: actions/download-artifact@v4 + with: + artifact-ids: ${{ needs.build_tests.outputs.test_package }} + path: target + - name: Run tests + run: | + chmod +x target/tests/* + error_occurred=0 + for exe in target/tests/*; do + ./"$exe" _amdgpu || { error_occurred=1; true; } + done + exit $error_occurred diff --git a/.github/workflows/push_master.yml b/.github/workflows/push_master.yml index 74afd47..ae736cc 100644 --- a/.github/workflows/push_master.yml +++ b/.github/workflows/push_master.yml @@ -1,10 +1,12 @@ name: ZLUDA on: + workflow_dispatch: push: branches: [ master ] env: CARGO_TERM_COLOR: always + SCCACHE_GHA_ENABLED: "true" CARGO_PROFILE: release-lto ROCM_VERSION: "6.3.1" @@ -14,23 +16,21 @@ jobs: runs-on: ubuntu-22.04 steps: - uses: jlumbroso/free-disk-space@main - - name: Install ROCm - run: | - sudo mkdir --parents --mode=0755 /etc/apt/keyrings - sudo sh -c 'wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null' - sudo sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }} jammy main > /etc/apt/sources.list.d/rocm.list' - sudo sh -c 'echo Package: * > /etc/apt/preferences.d/rocm-pin-600' - sudo sh -c 'echo Pin: release o=repo.radeon.com >> /etc/apt/preferences.d/rocm-pin-600' - sudo sh -c 'echo Pin-Priority: 600 >> /etc/apt/preferences.d/rocm-pin-600' - sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib rocm-llvm-dev hip-runtime-amd hip-dev - echo 'export PATH="$PATH:/opt/rocm/bin"' | sudo tee /etc/profile.d/rocm.sh - echo '/opt/rocm/lib' | sudo tee /etc/ld.so.conf.d/rocm.conf - sudo ldconfig + with: + # Removing Android stuff should be enough + android: true + dotnet: false + haskell: false + large-packages: false + docker-images: false + swap-storage: false - uses: actions/checkout@v4 with: submodules: true - - uses: Swatinem/rust-cache@v2 + - name: Install ROCm + run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }} + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 - name: Build # https://github.com/actions/upload-artifact/issues/39 run: | @@ -51,7 +51,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: true - - uses: Swatinem/rust-cache@v2 + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 - name: Build run: | cargo xtask zip --profile ${{ env.CARGO_PROFILE }} @@ -63,3 +64,64 @@ jobs: with: name: zluda-windows-${{ env.SHORT_SHA }} path: target/${{ env.CARGO_PROFILE }}/zluda + build_tests: + name: Build AMD GPU unit tests + runs-on: ubuntu-22.04 + outputs: + test_package: ${{ steps.upload_artifacts.outputs.artifact-id }} + steps: + - uses: jlumbroso/free-disk-space@main + with: + # Removing Android stuff should be enough + android: true + dotnet: false + haskell: false + large-packages: false + docker-images: false + swap-storage: false + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install ROCm + run: sudo bash .github/workflows/rocm_setup_build.sh ${{ env.ROCM_VERSION }} + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.9 + - uses: taiki-e/install-action@v2 + with: + tool: cargo-export + - name: Build + run: | + cargo export target/tests -- test --features ci_build --workspace --exclude cuda_base --exclude ptx_parser_macros + mkdir -p target/amdgpu + bash .github/workflows/move_tests.sh target/tests amdgpu + strip target/amdgpu/* + - name: Upload + id: upload_artifacts + uses: actions/upload-artifact@v4 + with: + name: tests + path: target/amdgpu + retention-days: 7 + run_tests: + name: Run AMD GPU unit tests + runs-on: gpu_small + needs: [build_tests] + steps: + - uses: actions/checkout@v4 + with: + submodules: false + sparse-checkout: .github + - name: Install ROCm + run: sudo bash .github/workflows/rocm_setup_run.sh ${{ env.ROCM_VERSION }} + - uses: actions/download-artifact@v4 + with: + artifact-ids: ${{ needs.build_tests.outputs.test_package }} + path: target + - name: Run tests + run: | + chmod +x target/tests/* + error_occurred=0 + for exe in target/tests/*; do + ./"$exe" _amdgpu || { error_occurred=1; true; } + done + exit $error_occurred diff --git a/.github/workflows/rocm_setup_build.sh b/.github/workflows/rocm_setup_build.sh new file mode 100644 index 0000000..a169319 --- /dev/null +++ b/.github/workflows/rocm_setup_build.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -ex +ROCM_VERSION=$1 + +DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg +# Source: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager/package-manager-ubuntu.html +mkdir --parents --mode=0755 /etc/apt/keyrings +wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \ + gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null +echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION jammy main | tee /etc/apt/sources.list.d/rocm.list +echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ + | tee /etc/apt/preferences.d/rocm-pin-600 +DEBIAN_FRONTEND=noninteractive apt update -y +DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends rocm-smi-lib rocm-llvm-dev hip-runtime-amd hip-dev +echo 'export PATH="$PATH:/opt/rocm/bin"' | tee /etc/profile.d/rocm.sh +echo "/opt/rocm/lib" | tee /etc/ld.so.conf.d/rocm.conf +ldconfig \ No newline at end of file diff --git a/.github/workflows/rocm_setup_run.sh b/.github/workflows/rocm_setup_run.sh new file mode 100644 index 0000000..5942e38 --- /dev/null +++ b/.github/workflows/rocm_setup_run.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -ex +ROCM_VERSION=$1 + +DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gpg zstd unzip "linux-headers-$(uname -r)" "linux-modules-extra-$(uname -r)" +# Source: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/install-methods/package-manager/package-manager-ubuntu.html +mkdir --parents --mode=0755 /etc/apt/keyrings +wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \ + gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null +echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION noble main | tee /etc/apt/sources.list.d/rocm.list +echo deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/$ROCM_VERSION/ubuntu noble main | tee /etc/apt/sources.list.d/amdgpu.list +echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ + | tee /etc/apt/preferences.d/rocm-pin-600 +DEBIAN_FRONTEND=noninteractive apt update -y +DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends amdgpu-dkms hip-runtime-amd +echo 'export PATH="$PATH:/opt/rocm/bin"' | tee /etc/profile.d/rocm.sh +echo "/opt/rocm/lib" | tee /etc/ld.so.conf.d/rocm.conf +ldconfig + +#Grant access to GPUs to all users via udev rules +cat <<'EOF' > /etc/udev/rules.d/70-amdgpu.rules +KERNEL=="kfd", MODE="0666" +SUBSYSTEM=="drm", KERNEL=="renderD*", MODE="0666" +EOF +udevadm control --reload-rules && udevadm trigger +modprobe amdgpu \ No newline at end of file diff --git a/llvm_zluda/build.rs b/llvm_zluda/build.rs index ab66e1b..722dc42 100644 --- a/llvm_zluda/build.rs +++ b/llvm_zluda/build.rs @@ -14,6 +14,7 @@ const COMPONENTS: &[&'static str] = &[ fn main() { let mut cmake = Config::new(r"../ext/llvm-project/llvm"); + try_use_sccache(&mut cmake); try_use_ninja(&mut cmake); cmake // It's not like we can do anything about the warnings @@ -52,6 +53,21 @@ fn main() { compile_cxx_lib(cxxflags); } +// https://github.com/mozilla/sccache/blob/main/README.md#usage +fn try_use_sccache(cmake: &mut Config) { + if let Ok(sccache) = std::env::var("SCCACHE_PATH") { + cmake.define("CMAKE_CXX_COMPILER_LAUNCHER", &*sccache); + cmake.define("CMAKE_C_COMPILER_LAUNCHER", &*sccache); + match std::env::var_os("CARGO_CFG_TARGET_OS") { + Some(os) if os == "windows" => { + cmake.define("CMAKE_MSVC_DEBUG_INFORMATION_FORMAT", "Embedded"); + cmake.define("CMAKE_POLICY_CMP0141", "NEW"); + } + _ => {} + } + } +} + fn try_use_ninja(cmake: &mut Config) { let mut cmd = Command::new("ninja"); cmd.arg("--version"); diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index c594ebb..a2f4a11 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -53,7 +53,7 @@ macro_rules! test_ptx { ($fn_name:ident, $input:expr, $output:expr) => { paste::item! { #[test] - fn [<$fn_name _hip>]() -> Result<(), Box> { + fn [<$fn_name _amdgpu>]() -> Result<(), Box> { let ptx = read_test_file!(concat!(stringify!($fn_name), ".ptx")); let input = $input; let output = $output; @@ -83,7 +83,7 @@ macro_rules! test_ptx_warp { ($fn_name:ident, $output:expr) => { paste::item! { #[test] - fn [<$fn_name _hip>]() -> Result<(), Box> { + fn [<$fn_name _amdgpu>]() -> Result<(), Box> { let ptx = read_test_file!(concat!(stringify!($fn_name), ".ptx")); let mut output = $output; test_hip_assert(stringify!($fn_name), &ptx, None::<&[u8]>, &mut output, 64) @@ -273,15 +273,15 @@ test_ptx!(activemask, [0u32], [1u32]); test_ptx!(membar, [152731u32], [152731u32]); test_ptx!(shared_unify_extern, [7681u64, 7682u64], [15363u64]); test_ptx!(shared_unify_local, [16752u64, 714u64], [17466u64]); -// This test currently fails for reasons outside of ZLUDA's control. +// FIXME: This test currently fails for reasons outside of ZLUDA's control. // One of the LLVM passes does not understand that setreg instruction changes // global floating point state and assumes that both floating point // additions are the exact same expressions and optimizes second addition away. -test_ptx!( - add_ftz, - [f32::from_bits(0x800000), f32::from_bits(0x007FFFFF)], - [0x800000u32, 0xFFFFFF] -); +// test_ptx!( +// add_ftz, +// [f32::from_bits(0x800000), f32::from_bits(0x007FFFFF)], +// [0x800000u32, 0xFFFFFF] +// ); test_ptx!(add_s32_sat, [i32::MIN, -1], [i32::MIN, i32::MAX]); test_ptx!(malformed_label, [2u64], [3u64]); test_ptx!( diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index f7b9f22..8274c75 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -26,7 +26,7 @@ fn get_ptx_from_wrapped_fatbin(image: *const ::core::ffi::c_void) -> Result Result { let ptx = if unsafe { *(image as *const u32) } == FatbincWrapper::MAGIC { let ptx_bytes = get_ptx_from_wrapped_fatbin(image)?; - str::from_utf8(&ptx_bytes) + std::str::from_utf8(&ptx_bytes) .map_err(|_| CUerror::UNKNOWN)? .to_owned() } else { diff --git a/zluda_inject/tests/inject.rs b/zluda_inject/tests/inject.rs index 3e6ae97..f897f9c 100644 --- a/zluda_inject/tests/inject.rs +++ b/zluda_inject/tests/inject.rs @@ -1,3 +1,4 @@ +#![cfg(windows)] use std::{env, io, path::PathBuf, process::Command}; #[test]