From 119b635b9dffccc2de699b188897d8077529b0d6 Mon Sep 17 00:00:00 2001 From: Violet Date: Wed, 23 Jul 2025 14:55:52 -0700 Subject: [PATCH] Emit correct alignment for loads and stores (#429) --- ptx/src/pass/llvm/emit.rs | 19 +++++-- ptx/src/test/ll/activemask.ll | 6 +-- ptx/src/test/ll/add.ll | 26 ++++----- ptx/src/test/ll/add_non_coherent.ll | 26 ++++----- ptx/src/test/ll/add_s32_sat.ll | 16 +++--- ptx/src/test/ll/add_tuning.ll | 26 ++++----- ptx/src/test/ll/and.ll | 14 ++--- ptx/src/test/ll/assertfail.ll | 48 ++++++++--------- ptx/src/test/ll/atom_add.ll | 16 +++--- ptx/src/test/ll/atom_add_float.ll | 16 +++--- ptx/src/test/ll/atom_cas.ll | 18 +++---- ptx/src/test/ll/atom_inc.ll | 20 +++---- ptx/src/test/ll/b64tof64.ll | 20 +++---- ptx/src/test/ll/bar_red_and_pred.ll | 18 +++---- ptx/src/test/ll/bfe.ll | 16 +++--- ptx/src/test/ll/bfi.ll | 18 +++---- ptx/src/test/ll/block.ll | 30 +++++------ ptx/src/test/ll/bra.ll | 26 ++++----- ptx/src/test/ll/brev.ll | 12 ++--- ptx/src/test/ll/call.ll | 54 +++++++++---------- ptx/src/test/ll/call_rnd.ll | 20 +++---- ptx/src/test/ll/clz.ll | 12 ++--- ptx/src/test/ll/const.ll | 18 +++---- ptx/src/test/ll/constant_f32.ll | 12 ++--- ptx/src/test/ll/constant_negative.ll | 12 ++--- ptx/src/test/ll/cos.ll | 12 ++--- ptx/src/test/ll/cvt_f64_f32.ll | 14 ++--- ptx/src/test/ll/cvt_rni.ll | 18 +++---- ptx/src/test/ll/cvt_rni_u16_f32.ll | 12 ++--- ptx/src/test/ll/cvt_rzi.ll | 18 +++---- ptx/src/test/ll/cvt_s16_s8.ll | 12 ++--- ptx/src/test/ll/cvt_s32_f32.ll | 16 +++--- ptx/src/test/ll/cvt_s64_s32.ll | 18 +++---- ptx/src/test/ll/cvt_sat_s_u.ll | 14 ++--- ptx/src/test/ll/cvta.ll | 16 +++--- ptx/src/test/ll/div_approx.ll | 14 ++--- ptx/src/test/ll/ex2.ll | 12 ++--- ptx/src/test/ll/extern_func.ll | 30 +++++------ ptx/src/test/ll/extern_shared.ll | 28 +++++----- ptx/src/test/ll/extern_shared_call.ll | 40 +++++++------- ptx/src/test/ll/fma.ll | 16 +++--- ptx/src/test/ll/fmax.ll | 14 ++--- ptx/src/test/ll/global_array.ll | 10 ++-- ptx/src/test/ll/lanemask_lt.ll | 12 ++--- ptx/src/test/ll/ld_st.ll | 20 +++---- ptx/src/test/ll/ld_st_implicit.ll | 18 +++---- ptx/src/test/ll/ld_st_offset.ll | 16 +++--- ptx/src/test/ll/lg2.ll | 12 ++--- ptx/src/test/ll/local_align.ll | 20 +++---- ptx/src/test/ll/mad_s32.ll | 16 +++--- ptx/src/test/ll/mad_wide.ll | 28 +++++----- ptx/src/test/ll/malformed_label.ll | 24 ++++----- ptx/src/test/ll/max.ll | 14 ++--- ptx/src/test/ll/membar.ll | 12 ++--- ptx/src/test/ll/min.ll | 14 ++--- ptx/src/test/ll/mov.ll | 24 ++++----- ptx/src/test/ll/mov_address.ll | 2 +- ptx/src/test/ll/mul24_hi_s32.ll | 12 ++--- ptx/src/test/ll/mul24_hi_u32.ll | 12 ++--- ptx/src/test/ll/mul24_lo_s32.ll | 12 ++--- ptx/src/test/ll/mul24_lo_u32.ll | 12 ++--- ptx/src/test/ll/mul_ftz.ll | 14 ++--- ptx/src/test/ll/mul_hi.ll | 24 ++++----- ptx/src/test/ll/mul_lo.ll | 24 ++++----- ptx/src/test/ll/mul_non_ftz.ll | 14 ++--- ptx/src/test/ll/mul_wide.ll | 20 +++---- ptx/src/test/ll/multiple_return.ll | 14 ++--- ptx/src/test/ll/neg.ll | 12 ++--- ptx/src/test/ll/non_scalar_ptr_offset.ll | 12 ++--- ptx/src/test/ll/not.ll | 24 ++++----- ptx/src/test/ll/ntid.ll | 12 ++--- ptx/src/test/ll/or.ll | 32 +++++------ ptx/src/test/ll/popc.ll | 12 ++--- ptx/src/test/ll/pred_not.ll | 34 ++++++------ ptx/src/test/ll/prmt.ll | 14 ++--- ptx/src/test/ll/rcp.ll | 12 ++--- ptx/src/test/ll/reg_local.ll | 28 +++++----- ptx/src/test/ll/rem.ll | 14 ++--- ptx/src/test/ll/rsqrt.ll | 12 ++--- ptx/src/test/ll/selp.ll | 14 ++--- ptx/src/test/ll/selp_true.ll | 14 ++--- ptx/src/test/ll/setp.ll | 34 ++++++------ ptx/src/test/ll/setp_gt.ll | 14 ++--- ptx/src/test/ll/setp_leu.ll | 14 ++--- ptx/src/test/ll/setp_nan.ll | 32 +++++------ ptx/src/test/ll/setp_num.ll | 32 +++++------ ptx/src/test/ll/shared_ptr_32.ll | 28 +++++----- ptx/src/test/ll/shared_ptr_take_address.ll | 34 ++++++------ ptx/src/test/ll/shared_unify_extern.ll | 56 ++++++++++---------- ptx/src/test/ll/shared_unify_local.ll | 52 +++++++++--------- ptx/src/test/ll/shared_variable.ll | 28 +++++----- ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll | 18 +++---- ptx/src/test/ll/shfl_sync_down_b32_pred.ll | 18 +++---- ptx/src/test/ll/shfl_sync_idx_b32_pred.ll | 18 +++---- ptx/src/test/ll/shfl_sync_mode_b32.ll | 18 +++---- ptx/src/test/ll/shfl_sync_up_b32_pred.ll | 18 +++---- ptx/src/test/ll/shl.ll | 24 ++++----- ptx/src/test/ll/shr.ll | 12 ++--- ptx/src/test/ll/sign_extend.ll | 12 ++--- ptx/src/test/ll/sin.ll | 12 ++--- ptx/src/test/ll/sqrt.ll | 12 ++--- ptx/src/test/ll/stateful_ld_st_ntid.ll | 38 ++++++------- ptx/src/test/ll/stateful_ld_st_ntid_chain.ll | 38 ++++++------- ptx/src/test/ll/stateful_ld_st_ntid_sub.ll | 38 ++++++------- ptx/src/test/ll/stateful_ld_st_simple.ll | 24 ++++----- ptx/src/test/ll/stateful_neg_offset.ll | 36 ++++++------- ptx/src/test/ll/sub.ll | 24 ++++----- ptx/src/test/ll/tid.ll | 14 ++--- ptx/src/test/ll/vector.ll | 14 ++--- ptx/src/test/ll/vector4.ll | 12 ++--- ptx/src/test/ll/vector_extract.ll | 12 ++--- ptx/src/test/ll/warp_sz.ll | 6 +-- ptx/src/test/ll/xor.ll | 14 ++--- 113 files changed, 1117 insertions(+), 1108 deletions(-) diff --git a/ptx/src/pass/llvm/emit.rs b/ptx/src/pass/llvm/emit.rs index e9226dc..eb1fbd4 100644 --- a/ptx/src/pass/llvm/emit.rs +++ b/ptx/src/pass/llvm/emit.rs @@ -532,8 +532,10 @@ impl<'a> MethodEmitContext<'a> { let builder = self.builder; let type_ = get_type(self.context, &data.typ)?; let ptr = self.resolver.value(arguments.src)?; - self.resolver.with_result(arguments.dst, |dst| unsafe { - LLVMBuildLoad2(builder, type_, ptr, dst) + self.resolver.with_result(arguments.dst, |dst| { + let load = unsafe { LLVMBuildLoad2(builder, type_, ptr, dst) }; + unsafe { LLVMSetAlignment(load, data.typ.layout().align() as u32) }; + load }); Ok(()) } @@ -739,7 +741,8 @@ impl<'a> MethodEmitContext<'a> { if data.qualifier != ast::LdStQualifier::Weak { todo!() } - unsafe { LLVMBuildStore(self.builder, value, ptr) }; + let store = unsafe { LLVMBuildStore(self.builder, value, ptr) }; + unsafe { LLVMSetAlignment(store, data.typ.layout().align() as u32); } Ok(()) } @@ -953,8 +956,14 @@ impl<'a> MethodEmitContext<'a> { .iter() .map(|(value, type_)| { let value = self.resolver.value(*value)?; - let type_ = get_type(self.context, type_)?; - Ok(unsafe { LLVMBuildLoad2(self.builder, type_, value, LLVM_UNNAMED.as_ptr()) }) + let lowered_type = get_type(self.context, type_)?; + let load = unsafe { + LLVMBuildLoad2(self.builder, lowered_type, value, LLVM_UNNAMED.as_ptr()) + }; + unsafe { + LLVMSetAlignment(load, type_.layout().align() as u32); + } + Ok(load) }) .collect::, _>>()?; diff --git a/ptx/src/test/ll/activemask.ll b/ptx/src/test/ll/activemask.ll index 0da737e..89c8a97 100644 --- a/ptx/src/test/ll/activemask.ll +++ b/ptx/src/test/ll/activemask.ll @@ -9,11 +9,11 @@ define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"29", ptr add br label %"28" "28": ; preds = %1 - %"33" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"33", ptr addrspace(5) %"31", align 4 + %"33" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"33", ptr addrspace(5) %"31", align 8 %"34" = call i32 @__zluda_ptx_impl_activemask() store i32 %"34", ptr addrspace(5) %"32", align 4 - %"35" = load i64, ptr addrspace(5) %"31", align 4 + %"35" = load i64, ptr addrspace(5) %"31", align 8 %"36" = load i32, ptr addrspace(5) %"32", align 4 %"37" = inttoptr i64 %"35" to ptr store i32 %"36", ptr %"37", align 4 diff --git a/ptx/src/test/ll/add.ll b/ptx/src/test/ll/add.ll index e945f2e..e2f3289 100644 --- a/ptx/src/test/ll/add.ll +++ b/ptx/src/test/ll/add.ll @@ -9,22 +9,22 @@ define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"32", ptr addrspace( br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr - %"40" = load i64, ptr %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"42" = add i64 %"43", 1 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"47" = inttoptr i64 %"44" to ptr - store i64 %"45", ptr %"47", align 4 + store i64 %"45", ptr %"47", align 8 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/add_non_coherent.ll b/ptx/src/test/ll/add_non_coherent.ll index 00e4092..136a8f9 100644 --- a/ptx/src/test/ll/add_non_coherent.ll +++ b/ptx/src/test/ll/add_non_coherent.ll @@ -9,22 +9,22 @@ define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"32", p br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr addrspace(1) - %"40" = load i64, ptr addrspace(1) %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr addrspace(1) %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"42" = add i64 %"43", 1 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"47" = inttoptr i64 %"44" to ptr addrspace(1) - store i64 %"45", ptr addrspace(1) %"47", align 4 + store i64 %"45", ptr addrspace(1) %"47", align 8 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/add_s32_sat.ll b/ptx/src/test/ll/add_s32_sat.ll index d50ae8d..8540055 100644 --- a/ptx/src/test/ll/add_s32_sat.ll +++ b/ptx/src/test/ll/add_s32_sat.ll @@ -11,15 +11,15 @@ define amdgpu_kernel void @add_s32_sat(ptr addrspace(4) byref(i64) %"37", ptr ad br label %"36" "36": ; preds = %1 - %"45" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"45", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(4) %"38", align 4 - store i64 %"46", ptr addrspace(5) %"40", align 4 - %"48" = load i64, ptr addrspace(5) %"39", align 4 + %"45" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"45", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"46", ptr addrspace(5) %"40", align 8 + %"48" = load i64, ptr addrspace(5) %"39", align 8 %"61" = inttoptr i64 %"48" to ptr %"47" = load i32, ptr %"61", align 4 store i32 %"47", ptr addrspace(5) %"41", align 4 - %"49" = load i64, ptr addrspace(5) %"39", align 4 + %"49" = load i64, ptr addrspace(5) %"39", align 8 %"62" = inttoptr i64 %"49" to ptr %"33" = getelementptr inbounds i8, ptr %"62", i64 4 %"50" = load i32, ptr %"33", align 4 @@ -32,11 +32,11 @@ define amdgpu_kernel void @add_s32_sat(ptr addrspace(4) byref(i64) %"37", ptr ad %"56" = load i32, ptr addrspace(5) %"42", align 4 %"54" = add i32 %"55", %"56" store i32 %"54", ptr addrspace(5) %"44", align 4 - %"57" = load i64, ptr addrspace(5) %"40", align 4 + %"57" = load i64, ptr addrspace(5) %"40", align 8 %"58" = load i32, ptr addrspace(5) %"43", align 4 %"63" = inttoptr i64 %"57" to ptr store i32 %"58", ptr %"63", align 4 - %"59" = load i64, ptr addrspace(5) %"40", align 4 + %"59" = load i64, ptr addrspace(5) %"40", align 8 %"64" = inttoptr i64 %"59" to ptr %"35" = getelementptr inbounds i8, ptr %"64", i64 4 %"60" = load i32, ptr addrspace(5) %"44", align 4 diff --git a/ptx/src/test/ll/add_tuning.ll b/ptx/src/test/ll/add_tuning.ll index 42d2031..a381be9 100644 --- a/ptx/src/test/ll/add_tuning.ll +++ b/ptx/src/test/ll/add_tuning.ll @@ -9,22 +9,22 @@ define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"32", ptr add br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr - %"40" = load i64, ptr %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"42" = add i64 %"43", 1 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"47" = inttoptr i64 %"44" to ptr - store i64 %"45", ptr %"47", align 4 + store i64 %"45", ptr %"47", align 8 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/and.ll b/ptx/src/test/ll/and.ll index 84b720c..9c9c279 100644 --- a/ptx/src/test/ll/and.ll +++ b/ptx/src/test/ll/and.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"33", ptr addrspace( br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load i32, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"33", ptr addrspace( %"47" = load i32, ptr addrspace(5) %"38", align 4 %"52" = and i32 %"46", %"47" store i32 %"52", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load i32, ptr addrspace(5) %"37", align 4 %"55" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"55", align 4 diff --git a/ptx/src/test/ll/assertfail.ll b/ptx/src/test/ll/assertfail.ll index 50d51fe..2dfc81c 100644 --- a/ptx/src/test/ll/assertfail.ll +++ b/ptx/src/test/ll/assertfail.ll @@ -17,46 +17,46 @@ define amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"86", ptr add br label %"84" "84": ; preds = %1 - %"92" = load i64, ptr addrspace(4) %"86", align 4 - store i64 %"92", ptr addrspace(5) %"88", align 4 - %"93" = load i64, ptr addrspace(4) %"87", align 4 - store i64 %"93", ptr addrspace(5) %"89", align 4 + %"92" = load i64, ptr addrspace(4) %"86", align 8 + store i64 %"92", ptr addrspace(5) %"88", align 8 + %"93" = load i64, ptr addrspace(4) %"87", align 8 + store i64 %"93", ptr addrspace(5) %"89", align 8 store i32 0, ptr addrspace(5) %"94", align 4 %"97" = getelementptr inbounds i8, ptr addrspace(5) %"96", i64 0 - %"98" = load i64, ptr addrspace(5) %"88", align 4 - store i64 %"98", ptr addrspace(5) %"97", align 4 + %"98" = load i64, ptr addrspace(5) %"88", align 8 + store i64 %"98", ptr addrspace(5) %"97", align 8 %"100" = getelementptr inbounds i8, ptr addrspace(5) %"99", i64 0 - %"101" = load i64, ptr addrspace(5) %"88", align 4 - store i64 %"101", ptr addrspace(5) %"100", align 4 + %"101" = load i64, ptr addrspace(5) %"88", align 8 + store i64 %"101", ptr addrspace(5) %"100", align 8 %"103" = getelementptr inbounds i8, ptr addrspace(5) %"102", i64 0 %"104" = load i32, ptr addrspace(5) %"94", align 4 store i32 %"104", ptr addrspace(5) %"103", align 4 %"106" = getelementptr inbounds i8, ptr addrspace(5) %"105", i64 0 - %"107" = load i64, ptr addrspace(5) %"88", align 4 - store i64 %"107", ptr addrspace(5) %"106", align 4 + %"107" = load i64, ptr addrspace(5) %"88", align 8 + store i64 %"107", ptr addrspace(5) %"106", align 8 %"109" = getelementptr inbounds i8, ptr addrspace(5) %"108", i64 0 - %"110" = load i64, ptr addrspace(5) %"88", align 4 - store i64 %"110", ptr addrspace(5) %"109", align 4 - %"74" = load i64, ptr addrspace(5) %"96", align 4 - %"75" = load i64, ptr addrspace(5) %"99", align 4 + %"110" = load i64, ptr addrspace(5) %"88", align 8 + store i64 %"110", ptr addrspace(5) %"109", align 8 + %"74" = load i64, ptr addrspace(5) %"96", align 8 + %"75" = load i64, ptr addrspace(5) %"99", align 8 %"76" = load i32, ptr addrspace(5) %"102", align 4 - %"77" = load i64, ptr addrspace(5) %"105", align 4 - %"78" = load i64, ptr addrspace(5) %"108", align 4 + %"77" = load i64, ptr addrspace(5) %"105", align 8 + %"78" = load i64, ptr addrspace(5) %"108", align 8 call void @__zluda_ptx_impl___assertfail(i64 %"74", i64 %"75", i32 %"76", i64 %"77", i64 %"78") br label %"85" "85": ; preds = %"84" - %"112" = load i64, ptr addrspace(5) %"88", align 4 + %"112" = load i64, ptr addrspace(5) %"88", align 8 %"122" = inttoptr i64 %"112" to ptr - %"111" = load i64, ptr %"122", align 4 - store i64 %"111", ptr addrspace(5) %"90", align 4 - %"114" = load i64, ptr addrspace(5) %"90", align 4 + %"111" = load i64, ptr %"122", align 8 + store i64 %"111", ptr addrspace(5) %"90", align 8 + %"114" = load i64, ptr addrspace(5) %"90", align 8 %"113" = add i64 %"114", 1 - store i64 %"113", ptr addrspace(5) %"91", align 4 - %"115" = load i64, ptr addrspace(5) %"89", align 4 - %"116" = load i64, ptr addrspace(5) %"91", align 4 + store i64 %"113", ptr addrspace(5) %"91", align 8 + %"115" = load i64, ptr addrspace(5) %"89", align 8 + %"116" = load i64, ptr addrspace(5) %"91", align 8 %"123" = inttoptr i64 %"115" to ptr - store i64 %"116", ptr %"123", align 4 + store i64 %"116", ptr %"123", align 8 ret void } diff --git a/ptx/src/test/ll/atom_add.ll b/ptx/src/test/ll/atom_add.ll index 72a9a75..4938216 100644 --- a/ptx/src/test/ll/atom_add.ll +++ b/ptx/src/test/ll/atom_add.ll @@ -11,15 +11,15 @@ define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"36", ptr addrs br label %"35" "35": ; preds = %1 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"43" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"43", ptr addrspace(5) %"39", align 4 - %"45" = load i64, ptr addrspace(5) %"38", align 4 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"43" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"43", ptr addrspace(5) %"39", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 %"56" = inttoptr i64 %"45" to ptr %"44" = load i32, ptr %"56", align 4 store i32 %"44", ptr addrspace(5) %"40", align 4 - %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"46" = load i64, ptr addrspace(5) %"38", align 8 %"57" = inttoptr i64 %"46" to ptr %"32" = getelementptr inbounds i8, ptr %"57", i64 4 %"47" = load i32, ptr %"32", align 4 @@ -31,11 +31,11 @@ define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"36", ptr addrs store i32 %2, ptr addrspace(5) %"40", align 4 %"51" = load i32, ptr addrspace(3) @shared_mem, align 4 store i32 %"51", ptr addrspace(5) %"41", align 4 - %"52" = load i64, ptr addrspace(5) %"39", align 4 + %"52" = load i64, ptr addrspace(5) %"39", align 8 %"53" = load i32, ptr addrspace(5) %"40", align 4 %"61" = inttoptr i64 %"52" to ptr store i32 %"53", ptr %"61", align 4 - %"54" = load i64, ptr addrspace(5) %"39", align 4 + %"54" = load i64, ptr addrspace(5) %"39", align 8 %"62" = inttoptr i64 %"54" to ptr %"34" = getelementptr inbounds i8, ptr %"62", i64 4 %"55" = load i32, ptr addrspace(5) %"41", align 4 diff --git a/ptx/src/test/ll/atom_add_float.ll b/ptx/src/test/ll/atom_add_float.ll index acf9979..89cef23 100644 --- a/ptx/src/test/ll/atom_add_float.ll +++ b/ptx/src/test/ll/atom_add_float.ll @@ -11,15 +11,15 @@ define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"36", ptr br label %"35" "35": ; preds = %1 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"43" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"43", ptr addrspace(5) %"39", align 4 - %"45" = load i64, ptr addrspace(5) %"38", align 4 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"43" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"43", ptr addrspace(5) %"39", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 %"56" = inttoptr i64 %"45" to ptr %"44" = load float, ptr %"56", align 4 store float %"44", ptr addrspace(5) %"40", align 4 - %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"46" = load i64, ptr addrspace(5) %"38", align 8 %"57" = inttoptr i64 %"46" to ptr %"32" = getelementptr inbounds i8, ptr %"57", i64 4 %"47" = load float, ptr %"32", align 4 @@ -31,11 +31,11 @@ define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"36", ptr store float %2, ptr addrspace(5) %"40", align 4 %"51" = load float, ptr addrspace(3) @shared_mem, align 4 store float %"51", ptr addrspace(5) %"41", align 4 - %"52" = load i64, ptr addrspace(5) %"39", align 4 + %"52" = load i64, ptr addrspace(5) %"39", align 8 %"53" = load float, ptr addrspace(5) %"40", align 4 %"61" = inttoptr i64 %"52" to ptr store float %"53", ptr %"61", align 4 - %"54" = load i64, ptr addrspace(5) %"39", align 4 + %"54" = load i64, ptr addrspace(5) %"39", align 8 %"62" = inttoptr i64 %"54" to ptr %"34" = getelementptr inbounds i8, ptr %"62", i64 4 %"55" = load float, ptr addrspace(5) %"41", align 4 diff --git a/ptx/src/test/ll/atom_cas.ll b/ptx/src/test/ll/atom_cas.ll index 073fb62..353d5a2 100644 --- a/ptx/src/test/ll/atom_cas.ll +++ b/ptx/src/test/ll/atom_cas.ll @@ -9,31 +9,31 @@ define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"38", ptr addrs br label %"37" "37": ; preds = %1 - %"44" = load i64, ptr addrspace(4) %"38", align 4 - store i64 %"44", ptr addrspace(5) %"40", align 4 - %"45" = load i64, ptr addrspace(4) %"39", align 4 - store i64 %"45", ptr addrspace(5) %"41", align 4 - %"47" = load i64, ptr addrspace(5) %"40", align 4 + %"44" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"44", ptr addrspace(5) %"40", align 8 + %"45" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"45", ptr addrspace(5) %"41", align 8 + %"47" = load i64, ptr addrspace(5) %"40", align 8 %"57" = inttoptr i64 %"47" to ptr %"46" = load i32, ptr %"57", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4 - %"48" = load i64, ptr addrspace(5) %"40", align 4 + %"48" = load i64, ptr addrspace(5) %"40", align 8 %"58" = inttoptr i64 %"48" to ptr %"31" = getelementptr inbounds i8, ptr %"58", i64 4 %"50" = load i32, ptr addrspace(5) %"42", align 4 %2 = cmpxchg ptr %"31", i32 %"50", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 %"59" = extractvalue { i32, i1 } %2, 0 store i32 %"59", ptr addrspace(5) %"42", align 4 - %"51" = load i64, ptr addrspace(5) %"40", align 4 + %"51" = load i64, ptr addrspace(5) %"40", align 8 %"61" = inttoptr i64 %"51" to ptr %"34" = getelementptr inbounds i8, ptr %"61", i64 4 %"52" = load i32, ptr %"34", align 4 store i32 %"52", ptr addrspace(5) %"43", align 4 - %"53" = load i64, ptr addrspace(5) %"41", align 4 + %"53" = load i64, ptr addrspace(5) %"41", align 8 %"54" = load i32, ptr addrspace(5) %"42", align 4 %"62" = inttoptr i64 %"53" to ptr store i32 %"54", ptr %"62", align 4 - %"55" = load i64, ptr addrspace(5) %"41", align 4 + %"55" = load i64, ptr addrspace(5) %"41", align 8 %"63" = inttoptr i64 %"55" to ptr %"36" = getelementptr inbounds i8, ptr %"63", i64 4 %"56" = load i32, ptr addrspace(5) %"43", align 4 diff --git a/ptx/src/test/ll/atom_inc.ll b/ptx/src/test/ll/atom_inc.ll index b6906f3..7f748e2 100644 --- a/ptx/src/test/ll/atom_inc.ll +++ b/ptx/src/test/ll/atom_inc.ll @@ -10,32 +10,32 @@ define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"38", ptr addrs br label %"37" "37": ; preds = %1 - %"45" = load i64, ptr addrspace(4) %"38", align 4 - store i64 %"45", ptr addrspace(5) %"40", align 4 - %"46" = load i64, ptr addrspace(4) %"39", align 4 - store i64 %"46", ptr addrspace(5) %"41", align 4 - %"48" = load i64, ptr addrspace(5) %"40", align 4 + %"45" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"45", ptr addrspace(5) %"40", align 8 + %"46" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"46", ptr addrspace(5) %"41", align 8 + %"48" = load i64, ptr addrspace(5) %"40", align 8 %"59" = inttoptr i64 %"48" to ptr %2 = atomicrmw uinc_wrap ptr %"59", i32 101 syncscope("agent-one-as") monotonic, align 4 store i32 %2, ptr addrspace(5) %"42", align 4 - %"50" = load i64, ptr addrspace(5) %"40", align 4 + %"50" = load i64, ptr addrspace(5) %"40", align 8 %"60" = inttoptr i64 %"50" to ptr addrspace(1) %3 = atomicrmw uinc_wrap ptr addrspace(1) %"60", i32 101 syncscope("agent-one-as") monotonic, align 4 store i32 %3, ptr addrspace(5) %"43", align 4 - %"52" = load i64, ptr addrspace(5) %"40", align 4 + %"52" = load i64, ptr addrspace(5) %"40", align 8 %"61" = inttoptr i64 %"52" to ptr %"51" = load i32, ptr %"61", align 4 store i32 %"51", ptr addrspace(5) %"44", align 4 - %"53" = load i64, ptr addrspace(5) %"41", align 4 + %"53" = load i64, ptr addrspace(5) %"41", align 8 %"54" = load i32, ptr addrspace(5) %"42", align 4 %"62" = inttoptr i64 %"53" to ptr store i32 %"54", ptr %"62", align 4 - %"55" = load i64, ptr addrspace(5) %"41", align 4 + %"55" = load i64, ptr addrspace(5) %"41", align 8 %"63" = inttoptr i64 %"55" to ptr %"34" = getelementptr inbounds i8, ptr %"63", i64 4 %"56" = load i32, ptr addrspace(5) %"43", align 4 store i32 %"56", ptr %"34", align 4 - %"57" = load i64, ptr addrspace(5) %"41", align 4 + %"57" = load i64, ptr addrspace(5) %"41", align 8 %"64" = inttoptr i64 %"57" to ptr %"36" = getelementptr inbounds i8, ptr %"64", i64 8 %"58" = load i32, ptr addrspace(5) %"44", align 4 diff --git a/ptx/src/test/ll/b64tof64.ll b/ptx/src/test/ll/b64tof64.ll index ced692b..6faaa72 100644 --- a/ptx/src/test/ll/b64tof64.ll +++ b/ptx/src/test/ll/b64tof64.ll @@ -11,20 +11,20 @@ define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"31", ptr addrs "30": ; preds = %1 %"37" = load double, ptr addrspace(4) %"31", align 8 store double %"37", ptr addrspace(5) %"33", align 8 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"35", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"35", align 8 %"40" = load double, ptr addrspace(5) %"33", align 8 %"46" = bitcast double %"40" to i64 - store i64 %"46", ptr addrspace(5) %"34", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + store i64 %"46", ptr addrspace(5) %"34", align 8 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %"47" = inttoptr i64 %"42" to ptr - %"41" = load i64, ptr %"47", align 4 - store i64 %"41", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 - %"44" = load i64, ptr addrspace(5) %"36", align 4 + %"41" = load i64, ptr %"47", align 8 + store i64 %"41", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"35", align 8 + %"44" = load i64, ptr addrspace(5) %"36", align 8 %"48" = inttoptr i64 %"43" to ptr - store i64 %"44", ptr %"48", align 4 + store i64 %"44", ptr %"48", align 8 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/bar_red_and_pred.ll b/ptx/src/test/ll/bar_red_and_pred.ll index 649efc0..d2ce83a 100644 --- a/ptx/src/test/ll/bar_red_and_pred.ll +++ b/ptx/src/test/ll/bar_red_and_pred.ll @@ -18,8 +18,8 @@ define amdgpu_kernel void @bar_red_and_pred(ptr addrspace(4) byref(i64) %"73", p br label %"70" "70": ; preds = %1 - %"82" = load i64, ptr addrspace(4) %"74", align 4 - store i64 %"82", ptr addrspace(5) %"75", align 4 + %"82" = load i64, ptr addrspace(4) %"74", align 8 + store i64 %"82", ptr addrspace(5) %"75", align 8 %"44" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) br label %"71" @@ -102,15 +102,15 @@ define amdgpu_kernel void @bar_red_and_pred(ptr addrspace(4) byref(i64) %"73", p "26": ; preds = %"25", %"24" %"118" = load i32, ptr addrspace(5) %"77", align 4 %"117" = zext i32 %"118" to i64 - store i64 %"117", ptr addrspace(5) %"76", align 4 - %"120" = load i64, ptr addrspace(5) %"76", align 4 + store i64 %"117", ptr addrspace(5) %"76", align 8 + %"120" = load i64, ptr addrspace(5) %"76", align 8 %"119" = mul i64 %"120", 4 - store i64 %"119", ptr addrspace(5) %"76", align 4 - %"122" = load i64, ptr addrspace(5) %"75", align 4 - %"123" = load i64, ptr addrspace(5) %"76", align 4 + store i64 %"119", ptr addrspace(5) %"76", align 8 + %"122" = load i64, ptr addrspace(5) %"75", align 8 + %"123" = load i64, ptr addrspace(5) %"76", align 8 %"121" = add i64 %"122", %"123" - store i64 %"121", ptr addrspace(5) %"75", align 4 - %"124" = load i64, ptr addrspace(5) %"75", align 4 + store i64 %"121", ptr addrspace(5) %"75", align 8 + %"124" = load i64, ptr addrspace(5) %"75", align 8 %"125" = load i32, ptr addrspace(5) %"81", align 4 %"126" = inttoptr i64 %"124" to ptr store i32 %"125", ptr %"126", align 4 diff --git a/ptx/src/test/ll/bfe.ll b/ptx/src/test/ll/bfe.ll index 8544b99..d5393fd 100644 --- a/ptx/src/test/ll/bfe.ll +++ b/ptx/src/test/ll/bfe.ll @@ -12,20 +12,20 @@ define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"36", ptr addrspace( br label %"35" "35": ; preds = %1 - %"43" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"43", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"44", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"43" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"43", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"44", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(5) %"38", align 8 %"57" = inttoptr i64 %"46" to ptr %"45" = load i32, ptr %"57", align 4 store i32 %"45", ptr addrspace(5) %"40", align 4 - %"47" = load i64, ptr addrspace(5) %"38", align 4 + %"47" = load i64, ptr addrspace(5) %"38", align 8 %"58" = inttoptr i64 %"47" to ptr %"32" = getelementptr inbounds i8, ptr %"58", i64 4 %"48" = load i32, ptr %"32", align 4 store i32 %"48", ptr addrspace(5) %"41", align 4 - %"49" = load i64, ptr addrspace(5) %"38", align 4 + %"49" = load i64, ptr addrspace(5) %"38", align 8 %"59" = inttoptr i64 %"49" to ptr %"34" = getelementptr inbounds i8, ptr %"59", i64 8 %"50" = load i32, ptr %"34", align 4 @@ -35,7 +35,7 @@ define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"36", ptr addrspace( %"54" = load i32, ptr addrspace(5) %"42", align 4 %"51" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"52", i32 %"53", i32 %"54") store i32 %"51", ptr addrspace(5) %"40", align 4 - %"55" = load i64, ptr addrspace(5) %"39", align 4 + %"55" = load i64, ptr addrspace(5) %"39", align 8 %"56" = load i32, ptr addrspace(5) %"40", align 4 %"60" = inttoptr i64 %"55" to ptr store i32 %"56", ptr %"60", align 4 diff --git a/ptx/src/test/ll/bfi.ll b/ptx/src/test/ll/bfi.ll index 43b09f2..2938df8 100644 --- a/ptx/src/test/ll/bfi.ll +++ b/ptx/src/test/ll/bfi.ll @@ -13,25 +13,25 @@ define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"39", ptr addrspace( br label %"38" "38": ; preds = %1 - %"47" = load i64, ptr addrspace(4) %"39", align 4 - store i64 %"47", ptr addrspace(5) %"41", align 4 - %"48" = load i64, ptr addrspace(4) %"40", align 4 - store i64 %"48", ptr addrspace(5) %"42", align 4 - %"50" = load i64, ptr addrspace(5) %"41", align 4 + %"47" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"47", ptr addrspace(5) %"41", align 8 + %"48" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"48", ptr addrspace(5) %"42", align 8 + %"50" = load i64, ptr addrspace(5) %"41", align 8 %"64" = inttoptr i64 %"50" to ptr %"49" = load i32, ptr %"64", align 4 store i32 %"49", ptr addrspace(5) %"43", align 4 - %"51" = load i64, ptr addrspace(5) %"41", align 4 + %"51" = load i64, ptr addrspace(5) %"41", align 8 %"65" = inttoptr i64 %"51" to ptr %"33" = getelementptr inbounds i8, ptr %"65", i64 4 %"52" = load i32, ptr %"33", align 4 store i32 %"52", ptr addrspace(5) %"44", align 4 - %"53" = load i64, ptr addrspace(5) %"41", align 4 + %"53" = load i64, ptr addrspace(5) %"41", align 8 %"66" = inttoptr i64 %"53" to ptr %"35" = getelementptr inbounds i8, ptr %"66", i64 8 %"54" = load i32, ptr %"35", align 4 store i32 %"54", ptr addrspace(5) %"45", align 4 - %"55" = load i64, ptr addrspace(5) %"41", align 4 + %"55" = load i64, ptr addrspace(5) %"41", align 8 %"67" = inttoptr i64 %"55" to ptr %"37" = getelementptr inbounds i8, ptr %"67", i64 12 %"56" = load i32, ptr %"37", align 4 @@ -42,7 +42,7 @@ define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"39", ptr addrspace( %"61" = load i32, ptr addrspace(5) %"46", align 4 %"68" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"58", i32 %"59", i32 %"60", i32 %"61") store i32 %"68", ptr addrspace(5) %"43", align 4 - %"62" = load i64, ptr addrspace(5) %"42", align 4 + %"62" = load i64, ptr addrspace(5) %"42", align 8 %"63" = load i32, ptr addrspace(5) %"43", align 4 %"71" = inttoptr i64 %"62" to ptr store i32 %"63", ptr %"71", align 4 diff --git a/ptx/src/test/ll/block.ll b/ptx/src/test/ll/block.ll index b492a5a..b0b8e10 100644 --- a/ptx/src/test/ll/block.ll +++ b/ptx/src/test/ll/block.ll @@ -10,25 +10,25 @@ define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"34", ptr addrspac br label %"33" "33": ; preds = %1 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"51" = inttoptr i64 %"43" to ptr - %"42" = load i64, ptr %"51", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"45" = load i64, ptr addrspace(5) %"38", align 4 + %"42" = load i64, ptr %"51", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 %"44" = add i64 %"45", 1 - store i64 %"44", ptr addrspace(5) %"39", align 4 - %"48" = load i64, ptr addrspace(5) %"46", align 4 + store i64 %"44", ptr addrspace(5) %"39", align 8 + %"48" = load i64, ptr addrspace(5) %"46", align 8 %"47" = add i64 %"48", 1 - store i64 %"47", ptr addrspace(5) %"46", align 4 - %"49" = load i64, ptr addrspace(5) %"37", align 4 - %"50" = load i64, ptr addrspace(5) %"39", align 4 + store i64 %"47", ptr addrspace(5) %"46", align 8 + %"49" = load i64, ptr addrspace(5) %"37", align 8 + %"50" = load i64, ptr addrspace(5) %"39", align 8 %"52" = inttoptr i64 %"49" to ptr - store i64 %"50", ptr %"52", align 4 + store i64 %"50", ptr %"52", align 8 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/bra.ll b/ptx/src/test/ll/bra.ll index 3246790..6fe4d6d 100644 --- a/ptx/src/test/ll/bra.ll +++ b/ptx/src/test/ll/bra.ll @@ -9,28 +9,28 @@ define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"36", ptr addrspace( br label %"35" "35": ; preds = %1 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"43" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"43", ptr addrspace(5) %"39", align 4 - %"45" = load i64, ptr addrspace(5) %"38", align 4 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"43" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"43", ptr addrspace(5) %"39", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 %"50" = inttoptr i64 %"45" to ptr - %"44" = load i64, ptr %"50", align 4 - store i64 %"44", ptr addrspace(5) %"40", align 4 + %"44" = load i64, ptr %"50", align 8 + store i64 %"44", ptr addrspace(5) %"40", align 8 br label %"10" "10": ; preds = %"35" - %"47" = load i64, ptr addrspace(5) %"40", align 4 + %"47" = load i64, ptr addrspace(5) %"40", align 8 %"46" = add i64 %"47", 1 - store i64 %"46", ptr addrspace(5) %"41", align 4 + store i64 %"46", ptr addrspace(5) %"41", align 8 br label %"12" "12": ; preds = %"10" - %"48" = load i64, ptr addrspace(5) %"39", align 4 - %"49" = load i64, ptr addrspace(5) %"41", align 4 + %"48" = load i64, ptr addrspace(5) %"39", align 8 + %"49" = load i64, ptr addrspace(5) %"41", align 8 %"51" = inttoptr i64 %"48" to ptr - store i64 %"49", ptr %"51", align 4 + store i64 %"49", ptr %"51", align 8 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/brev.ll b/ptx/src/test/ll/brev.ll index 9126fc4..35faae2 100644 --- a/ptx/src/test/ll/brev.ll +++ b/ptx/src/test/ll/brev.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"30", ptr addrspace br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load i32, ptr %"43", align 4 store i32 %"37", ptr addrspace(5) %"34", align 4 %"40" = load i32, ptr addrspace(5) %"34", align 4 %"39" = call i32 @llvm.bitreverse.i32(i32 %"40") store i32 %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load i32, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store i32 %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/call.ll b/ptx/src/test/ll/call.ll index 09b68c9..094b2b8 100644 --- a/ptx/src/test/ll/call.ll +++ b/ptx/src/test/ll/call.ll @@ -9,17 +9,17 @@ define i64 @incr(i64 %"43") #0 { br label %"46" "46": ; preds = %1 - store i64 %"43", ptr addrspace(5) %"65", align 4 - %"67" = load i64, ptr addrspace(5) %"65", align 4 - store i64 %"67", ptr addrspace(5) %"66", align 4 - %"69" = load i64, ptr addrspace(5) %"66", align 4 + store i64 %"43", ptr addrspace(5) %"65", align 8 + %"67" = load i64, ptr addrspace(5) %"65", align 8 + store i64 %"67", ptr addrspace(5) %"66", align 8 + %"69" = load i64, ptr addrspace(5) %"66", align 8 %"68" = add i64 %"69", 1 - store i64 %"68", ptr addrspace(5) %"66", align 4 - %"70" = load i64, ptr addrspace(5) %"66", align 4 - store i64 %"70", ptr addrspace(5) %"64", align 4 - %"71" = load i64, ptr addrspace(5) %"64", align 4 - store i64 %"71", ptr addrspace(5) %"63", align 4 - %2 = load i64, ptr addrspace(5) %"63", align 4 + store i64 %"68", ptr addrspace(5) %"66", align 8 + %"70" = load i64, ptr addrspace(5) %"66", align 8 + store i64 %"70", ptr addrspace(5) %"64", align 8 + %"71" = load i64, ptr addrspace(5) %"64", align 8 + store i64 %"71", ptr addrspace(5) %"63", align 8 + %2 = load i64, ptr addrspace(5) %"63", align 8 ret i64 %2 } @@ -35,30 +35,30 @@ define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"48", ptr addrspace br label %"44" "44": ; preds = %1 - %"53" = load i64, ptr addrspace(4) %"48", align 4 - store i64 %"53", ptr addrspace(5) %"50", align 4 - %"54" = load i64, ptr addrspace(4) %"49", align 4 - store i64 %"54", ptr addrspace(5) %"51", align 4 - %"56" = load i64, ptr addrspace(5) %"50", align 4 + %"53" = load i64, ptr addrspace(4) %"48", align 8 + store i64 %"53", ptr addrspace(5) %"50", align 8 + %"54" = load i64, ptr addrspace(4) %"49", align 8 + store i64 %"54", ptr addrspace(5) %"51", align 8 + %"56" = load i64, ptr addrspace(5) %"50", align 8 %"72" = inttoptr i64 %"56" to ptr addrspace(1) - %"55" = load i64, ptr addrspace(1) %"72", align 4 - store i64 %"55", ptr addrspace(5) %"52", align 4 - %"59" = load i64, ptr addrspace(5) %"52", align 4 - store i64 %"59", ptr addrspace(5) %"57", align 4 - %"40" = load i64, ptr addrspace(5) %"57", align 4 + %"55" = load i64, ptr addrspace(1) %"72", align 8 + store i64 %"55", ptr addrspace(5) %"52", align 8 + %"59" = load i64, ptr addrspace(5) %"52", align 8 + store i64 %"59", ptr addrspace(5) %"57", align 8 + %"40" = load i64, ptr addrspace(5) %"57", align 8 %"41" = call i64 @incr(i64 %"40") br label %"45" "45": ; preds = %"44" - store i64 %"41", ptr addrspace(5) %"58", align 4 - %"60" = load i64, ptr addrspace(5) %"58", align 4 - store i64 %"60", ptr addrspace(5) %"52", align 4 - %"61" = load i64, ptr addrspace(5) %"51", align 4 - %"62" = load i64, ptr addrspace(5) %"52", align 4 + store i64 %"41", ptr addrspace(5) %"58", align 8 + %"60" = load i64, ptr addrspace(5) %"58", align 8 + store i64 %"60", ptr addrspace(5) %"52", align 8 + %"61" = load i64, ptr addrspace(5) %"51", align 8 + %"62" = load i64, ptr addrspace(5) %"52", align 8 %"75" = inttoptr i64 %"61" to ptr addrspace(1) - store i64 %"62", ptr addrspace(1) %"75", align 4 + store i64 %"62", ptr addrspace(1) %"75", align 8 ret void } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/call_rnd.ll b/ptx/src/test/ll/call_rnd.ll index b727812..22a3d40 100644 --- a/ptx/src/test/ll/call_rnd.ll +++ b/ptx/src/test/ll/call_rnd.ll @@ -86,25 +86,25 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"92", ptr addrs "84": ; preds = %1 call void @llvm.amdgcn.s.setreg(i32 6145, i32 1) - %"108" = load i64, ptr addrspace(4) %"92", align 4 - store i64 %"108", ptr addrspace(5) %"94", align 4 - %"109" = load i64, ptr addrspace(4) %"93", align 4 - store i64 %"109", ptr addrspace(5) %"95", align 4 - %"111" = load i64, ptr addrspace(5) %"94", align 4 + %"108" = load i64, ptr addrspace(4) %"92", align 8 + store i64 %"108", ptr addrspace(5) %"94", align 8 + %"109" = load i64, ptr addrspace(4) %"93", align 8 + store i64 %"109", ptr addrspace(5) %"95", align 8 + %"111" = load i64, ptr addrspace(5) %"94", align 8 %"154" = inttoptr i64 %"111" to ptr %"110" = load float, ptr %"154", align 4 store float %"110", ptr addrspace(5) %"96", align 4 - %"112" = load i64, ptr addrspace(5) %"94", align 4 + %"112" = load i64, ptr addrspace(5) %"94", align 8 %"155" = inttoptr i64 %"112" to ptr %"59" = getelementptr inbounds i8, ptr %"155", i64 4 %"113" = load float, ptr %"59", align 4 store float %"113", ptr addrspace(5) %"97", align 4 - %"114" = load i64, ptr addrspace(5) %"94", align 4 + %"114" = load i64, ptr addrspace(5) %"94", align 8 %"156" = inttoptr i64 %"114" to ptr %"61" = getelementptr inbounds i8, ptr %"156", i64 8 %"115" = load float, ptr %"61", align 4 store float %"115", ptr addrspace(5) %"98", align 4 - %"116" = load i64, ptr addrspace(5) %"94", align 4 + %"116" = load i64, ptr addrspace(5) %"94", align 8 %"157" = inttoptr i64 %"116" to ptr %"63" = getelementptr inbounds i8, ptr %"157", i64 12 %"117" = load float, ptr %"63", align 4 @@ -122,7 +122,7 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"92", ptr addrs store float %"74", ptr addrspace(5) %"104", align 4 %"120" = load float, ptr addrspace(5) %"104", align 4 store float %"120", ptr addrspace(5) %"100", align 4 - %"121" = load i64, ptr addrspace(5) %"95", align 4 + %"121" = load i64, ptr addrspace(5) %"95", align 8 %"122" = load float, ptr addrspace(5) %"100", align 4 %"158" = inttoptr i64 %"121" to ptr store float %"122", ptr %"158", align 4 @@ -139,7 +139,7 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"92", ptr addrs store float %"77", ptr addrspace(5) %"107", align 4 %"125" = load float, ptr addrspace(5) %"107", align 4 store float %"125", ptr addrspace(5) %"101", align 4 - %"126" = load i64, ptr addrspace(5) %"95", align 4 + %"126" = load i64, ptr addrspace(5) %"95", align 8 %"159" = inttoptr i64 %"126" to ptr %"65" = getelementptr inbounds i8, ptr %"159", i64 4 %"127" = load float, ptr addrspace(5) %"101", align 4 diff --git a/ptx/src/test/ll/clz.ll b/ptx/src/test/ll/clz.ll index e9ff3e5..2cb5066 100644 --- a/ptx/src/test/ll/clz.ll +++ b/ptx/src/test/ll/clz.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"30", ptr addrspace( br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load i32, ptr %"43", align 4 store i32 %"37", ptr addrspace(5) %"34", align 4 %"40" = load i32, ptr addrspace(5) %"34", align 4 %"44" = call i32 @llvm.ctlz.i32(i32 %"40", i1 false) store i32 %"44", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load i32, ptr addrspace(5) %"34", align 4 %"45" = inttoptr i64 %"41" to ptr store i32 %"42", ptr %"45", align 4 diff --git a/ptx/src/test/ll/const.ll b/ptx/src/test/ll/const.ll index ec65d1f..527b778 100644 --- a/ptx/src/test/ll/const.ll +++ b/ptx/src/test/ll/const.ll @@ -13,10 +13,10 @@ define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"46", ptr addrspac br label %"45" "45": ; preds = %1 - %"54" = load i64, ptr addrspace(4) %"46", align 4 - store i64 %"54", ptr addrspace(5) %"48", align 4 - %"55" = load i64, ptr addrspace(4) %"47", align 4 - store i64 %"55", ptr addrspace(5) %"49", align 4 + %"54" = load i64, ptr addrspace(4) %"46", align 8 + store i64 %"54", ptr addrspace(5) %"48", align 8 + %"55" = load i64, ptr addrspace(4) %"47", align 8 + store i64 %"55", ptr addrspace(5) %"49", align 8 %"56" = load i16, ptr addrspace(4) @constparams, align 2 store i16 %"56", ptr addrspace(5) %"50", align 2 %"57" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2 @@ -25,21 +25,21 @@ define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"46", ptr addrspac store i16 %"58", ptr addrspace(5) %"52", align 2 %"59" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2 store i16 %"59", ptr addrspace(5) %"53", align 2 - %"60" = load i64, ptr addrspace(5) %"49", align 4 + %"60" = load i64, ptr addrspace(5) %"49", align 8 %"61" = load i16, ptr addrspace(5) %"50", align 2 %"72" = inttoptr i64 %"60" to ptr store i16 %"61", ptr %"72", align 2 - %"62" = load i64, ptr addrspace(5) %"49", align 4 + %"62" = load i64, ptr addrspace(5) %"49", align 8 %"74" = inttoptr i64 %"62" to ptr %"40" = getelementptr inbounds i8, ptr %"74", i64 2 %"63" = load i16, ptr addrspace(5) %"51", align 2 store i16 %"63", ptr %"40", align 2 - %"64" = load i64, ptr addrspace(5) %"49", align 4 + %"64" = load i64, ptr addrspace(5) %"49", align 8 %"76" = inttoptr i64 %"64" to ptr %"42" = getelementptr inbounds i8, ptr %"76", i64 4 %"65" = load i16, ptr addrspace(5) %"52", align 2 store i16 %"65", ptr %"42", align 2 - %"66" = load i64, ptr addrspace(5) %"49", align 4 + %"66" = load i64, ptr addrspace(5) %"49", align 8 %"78" = inttoptr i64 %"66" to ptr %"44" = getelementptr inbounds i8, ptr %"78", i64 6 %"67" = load i16, ptr addrspace(5) %"53", align 2 @@ -47,4 +47,4 @@ define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"46", ptr addrspac ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/constant_f32.ll b/ptx/src/test/ll/constant_f32.ll index b1c04a4..4b36f04 100644 --- a/ptx/src/test/ll/constant_f32.ll +++ b/ptx/src/test/ll/constant_f32.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"31", ptr a br label %"30" "30": ; preds = %1 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"37" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"37" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"44" = inttoptr i64 %"39" to ptr %"38" = load float, ptr %"44", align 4 store float %"38", ptr addrspace(5) %"35", align 4 %"41" = load float, ptr addrspace(5) %"35", align 4 %"40" = fmul float %"41", 5.000000e-01 store float %"40", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %"43" = load float, ptr addrspace(5) %"35", align 4 %"45" = inttoptr i64 %"42" to ptr store float %"43", ptr %"45", align 4 diff --git a/ptx/src/test/ll/constant_negative.ll b/ptx/src/test/ll/constant_negative.ll index 9fec04d..c10583c 100644 --- a/ptx/src/test/ll/constant_negative.ll +++ b/ptx/src/test/ll/constant_negative.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"31", br label %"30" "30": ; preds = %1 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"37" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"37" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"44" = inttoptr i64 %"39" to ptr %"38" = load i32, ptr %"44", align 4 store i32 %"38", ptr addrspace(5) %"35", align 4 %"41" = load i32, ptr addrspace(5) %"35", align 4 %"40" = mul i32 %"41", -1 store i32 %"40", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %"43" = load i32, ptr addrspace(5) %"35", align 4 %"45" = inttoptr i64 %"42" to ptr store i32 %"43", ptr %"45", align 4 diff --git a/ptx/src/test/ll/cos.ll b/ptx/src/test/ll/cos.ll index bd1b5cb..0a7f934 100644 --- a/ptx/src/test/ll/cos.ll +++ b/ptx/src/test/ll/cos.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"30", ptr addrspace( br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load float, ptr %"43", align 4 store float %"37", ptr addrspace(5) %"34", align 4 %"40" = load float, ptr addrspace(5) %"34", align 4 %"39" = call afn float @llvm.cos.f32(float %"40") store float %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load float, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store float %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/cvt_f64_f32.ll b/ptx/src/test/ll/cvt_f64_f32.ll index d7360da..84e0d4d 100644 --- a/ptx/src/test/ll/cvt_f64_f32.ll +++ b/ptx/src/test/ll/cvt_f64_f32.ll @@ -9,22 +9,22 @@ define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"31", ptr ad br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"45" = inttoptr i64 %"40" to ptr addrspace(1) %"39" = load float, ptr addrspace(1) %"45", align 4 store float %"39", ptr addrspace(5) %"35", align 4 %"42" = load float, ptr addrspace(5) %"35", align 4 %"41" = fpext float %"42" to double store double %"41", ptr addrspace(5) %"36", align 8 - %"43" = load i64, ptr addrspace(5) %"34", align 4 + %"43" = load i64, ptr addrspace(5) %"34", align 8 %"44" = load double, ptr addrspace(5) %"36", align 8 %"46" = inttoptr i64 %"43" to ptr store double %"44", ptr %"46", align 8 ret void } -attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } \ No newline at end of file +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } diff --git a/ptx/src/test/ll/cvt_rni.ll b/ptx/src/test/ll/cvt_rni.ll index 888997d..b5f5af8 100644 --- a/ptx/src/test/ll/cvt_rni.ll +++ b/ptx/src/test/ll/cvt_rni.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"35", ptr addrsp br label %"34" "34": ; preds = %1 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(5) %"37", align 4 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(5) %"37", align 8 %"55" = inttoptr i64 %"44" to ptr %"43" = load float, ptr %"55", align 4 store float %"43", ptr addrspace(5) %"39", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"56" = inttoptr i64 %"45" to ptr %"31" = getelementptr inbounds i8, ptr %"56", i64 4 %"46" = load float, ptr %"31", align 4 @@ -30,11 +30,11 @@ define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"35", ptr addrsp %3 = call float @llvm.roundeven.f32(float %"50") %"49" = freeze float %3 store float %"49", ptr addrspace(5) %"40", align 4 - %"51" = load i64, ptr addrspace(5) %"38", align 4 + %"51" = load i64, ptr addrspace(5) %"38", align 8 %"52" = load float, ptr addrspace(5) %"39", align 4 %"57" = inttoptr i64 %"51" to ptr store float %"52", ptr %"57", align 4 - %"53" = load i64, ptr addrspace(5) %"38", align 4 + %"53" = load i64, ptr addrspace(5) %"38", align 8 %"58" = inttoptr i64 %"53" to ptr %"33" = getelementptr inbounds i8, ptr %"58", i64 4 %"54" = load float, ptr addrspace(5) %"40", align 4 @@ -46,4 +46,4 @@ define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"35", ptr addrsp declare float @llvm.roundeven.f32(float) #1 attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/ptx/src/test/ll/cvt_rni_u16_f32.ll b/ptx/src/test/ll/cvt_rni_u16_f32.ll index 7b66751..c93f630 100644 --- a/ptx/src/test/ll/cvt_rni_u16_f32.ll +++ b/ptx/src/test/ll/cvt_rni_u16_f32.ll @@ -9,11 +9,11 @@ define amdgpu_kernel void @cvt_rni_u16_f32(ptr addrspace(4) byref(i64) %"31", pt br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"45" = inttoptr i64 %"40" to ptr addrspace(1) %"39" = load float, ptr addrspace(1) %"45", align 4 store float %"39", ptr addrspace(5) %"35", align 4 @@ -21,7 +21,7 @@ define amdgpu_kernel void @cvt_rni_u16_f32(ptr addrspace(4) byref(i64) %"31", pt %2 = call float @llvm.roundeven.f32(float %"42") %"41" = call i16 @llvm.fptoui.sat.i16.f32(float %2) store i16 %"41", ptr addrspace(5) %"36", align 2 - %"43" = load i64, ptr addrspace(5) %"34", align 4 + %"43" = load i64, ptr addrspace(5) %"34", align 8 %"44" = load i16, ptr addrspace(5) %"36", align 2 %"46" = inttoptr i64 %"43" to ptr store i16 %"44", ptr %"46", align 2 diff --git a/ptx/src/test/ll/cvt_rzi.ll b/ptx/src/test/ll/cvt_rzi.ll index 70019e1..a5c996a 100644 --- a/ptx/src/test/ll/cvt_rzi.ll +++ b/ptx/src/test/ll/cvt_rzi.ll @@ -10,15 +10,15 @@ define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"35", ptr addrsp "34": ; preds = %1 call void @llvm.amdgcn.s.setreg(i32 6145, i32 3) - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(5) %"37", align 4 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(5) %"37", align 8 %"55" = inttoptr i64 %"44" to ptr %"43" = load float, ptr %"55", align 4 store float %"43", ptr addrspace(5) %"39", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"56" = inttoptr i64 %"45" to ptr %"31" = getelementptr inbounds i8, ptr %"56", i64 4 %"46" = load float, ptr %"31", align 4 @@ -31,11 +31,11 @@ define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"35", ptr addrsp %3 = call float @llvm.trunc.f32(float %"50") %"49" = freeze float %3 store float %"49", ptr addrspace(5) %"40", align 4 - %"51" = load i64, ptr addrspace(5) %"38", align 4 + %"51" = load i64, ptr addrspace(5) %"38", align 8 %"52" = load float, ptr addrspace(5) %"39", align 4 %"57" = inttoptr i64 %"51" to ptr store float %"52", ptr %"57", align 4 - %"53" = load i64, ptr addrspace(5) %"38", align 4 + %"53" = load i64, ptr addrspace(5) %"38", align 8 %"58" = inttoptr i64 %"53" to ptr %"33" = getelementptr inbounds i8, ptr %"58", i64 4 %"54" = load float, ptr addrspace(5) %"40", align 4 @@ -51,4 +51,4 @@ declare float @llvm.trunc.f32(float) #2 attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind willreturn } -attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/ptx/src/test/ll/cvt_s16_s8.ll b/ptx/src/test/ll/cvt_s16_s8.ll index c199eee..fd4dc61 100644 --- a/ptx/src/test/ll/cvt_s16_s8.ll +++ b/ptx/src/test/ll/cvt_s16_s8.ll @@ -9,11 +9,11 @@ define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"31", ptr add br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"45" = inttoptr i64 %"40" to ptr addrspace(1) %"39" = load i32, ptr addrspace(1) %"45", align 4 store i32 %"39", ptr addrspace(5) %"36", align 4 @@ -22,7 +22,7 @@ define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"31", ptr add %"46" = sext i8 %2 to i16 %"41" = sext i16 %"46" to i32 store i32 %"41", ptr addrspace(5) %"35", align 4 - %"43" = load i64, ptr addrspace(5) %"34", align 4 + %"43" = load i64, ptr addrspace(5) %"34", align 8 %"44" = load i32, ptr addrspace(5) %"35", align 4 %"48" = inttoptr i64 %"43" to ptr store i32 %"44", ptr %"48", align 4 diff --git a/ptx/src/test/ll/cvt_s32_f32.ll b/ptx/src/test/ll/cvt_s32_f32.ll index 196f067..c3c69b3 100644 --- a/ptx/src/test/ll/cvt_s32_f32.ll +++ b/ptx/src/test/ll/cvt_s32_f32.ll @@ -9,16 +9,16 @@ define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"35", ptr ad br label %"34" "34": ; preds = %1 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(5) %"37", align 4 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(5) %"37", align 8 %"56" = inttoptr i64 %"44" to ptr %"55" = load float, ptr %"56", align 4 %"43" = bitcast float %"55" to i32 store i32 %"43", ptr addrspace(5) %"39", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"57" = inttoptr i64 %"45" to ptr %"31" = getelementptr inbounds i8, ptr %"57", i64 4 %"58" = load float, ptr %"31", align 4 @@ -36,11 +36,11 @@ define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"35", ptr ad %5 = fptosi float %4 to i32 %"61" = freeze i32 %5 store i32 %"61", ptr addrspace(5) %"40", align 4 - %"51" = load i64, ptr addrspace(5) %"38", align 4 + %"51" = load i64, ptr addrspace(5) %"38", align 8 %"52" = load i32, ptr addrspace(5) %"39", align 4 %"63" = inttoptr i64 %"51" to ptr addrspace(1) store i32 %"52", ptr addrspace(1) %"63", align 4 - %"53" = load i64, ptr addrspace(5) %"38", align 4 + %"53" = load i64, ptr addrspace(5) %"38", align 8 %"65" = inttoptr i64 %"53" to ptr addrspace(1) %"33" = getelementptr inbounds i8, ptr addrspace(1) %"65", i64 4 %"54" = load i32, ptr addrspace(5) %"40", align 4 diff --git a/ptx/src/test/ll/cvt_s64_s32.ll b/ptx/src/test/ll/cvt_s64_s32.ll index d1c6c83..d5ad9e5 100644 --- a/ptx/src/test/ll/cvt_s64_s32.ll +++ b/ptx/src/test/ll/cvt_s64_s32.ll @@ -9,21 +9,21 @@ define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"31", ptr ad br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"46" = inttoptr i64 %"40" to ptr %"45" = load i32, ptr %"46", align 4 store i32 %"45", ptr addrspace(5) %"35", align 4 %"42" = load i32, ptr addrspace(5) %"35", align 4 %"41" = sext i32 %"42" to i64 - store i64 %"41", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"34", align 4 - %"44" = load i64, ptr addrspace(5) %"36", align 4 + store i64 %"41", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"34", align 8 + %"44" = load i64, ptr addrspace(5) %"36", align 8 %"47" = inttoptr i64 %"43" to ptr - store i64 %"44", ptr %"47", align 4 + store i64 %"44", ptr %"47", align 8 ret void } diff --git a/ptx/src/test/ll/cvt_sat_s_u.ll b/ptx/src/test/ll/cvt_sat_s_u.ll index 68ff04b..6b31407 100644 --- a/ptx/src/test/ll/cvt_sat_s_u.ll +++ b/ptx/src/test/ll/cvt_sat_s_u.ll @@ -10,11 +10,11 @@ define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"32", ptr ad br label %"31" "31": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"39", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"40", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + %"39" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"39", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"40", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %"49" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"49", align 4 store i32 %"41", ptr addrspace(5) %"36", align 4 @@ -24,7 +24,7 @@ define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"32", ptr ad store i32 %3, ptr addrspace(5) %"37", align 4 %"46" = load i32, ptr addrspace(5) %"37", align 4 store i32 %"46", ptr addrspace(5) %"38", align 4 - %"47" = load i64, ptr addrspace(5) %"35", align 4 + %"47" = load i64, ptr addrspace(5) %"35", align 8 %"48" = load i32, ptr addrspace(5) %"38", align 4 %"50" = inttoptr i64 %"47" to ptr store i32 %"48", ptr %"50", align 4 @@ -38,4 +38,4 @@ declare i32 @llvm.smax.i32(i32, i32) #1 declare i32 @llvm.umin.i32(i32, i32) #1 attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } \ No newline at end of file +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/ptx/src/test/ll/cvta.ll b/ptx/src/test/ll/cvta.ll index 7bbbbfb..0a89e2a 100644 --- a/ptx/src/test/ll/cvta.ll +++ b/ptx/src/test/ll/cvta.ll @@ -8,23 +8,23 @@ define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"30", ptr addrspace br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %2 = inttoptr i64 %"38" to ptr %"45" = addrspacecast ptr %2 to ptr addrspace(1) store ptr addrspace(1) %"45", ptr addrspace(5) %"32", align 8 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %3 = inttoptr i64 %"40" to ptr %"47" = addrspacecast ptr %3 to ptr addrspace(1) store ptr addrspace(1) %"47", ptr addrspace(5) %"33", align 8 - %"42" = load i64, ptr addrspace(5) %"32", align 4 + %"42" = load i64, ptr addrspace(5) %"32", align 8 %"49" = inttoptr i64 %"42" to ptr addrspace(1) %"41" = load float, ptr addrspace(1) %"49", align 4 store float %"41", ptr addrspace(5) %"34", align 4 - %"43" = load i64, ptr addrspace(5) %"33", align 4 + %"43" = load i64, ptr addrspace(5) %"33", align 8 %"44" = load float, ptr addrspace(5) %"34", align 4 %"50" = inttoptr i64 %"43" to ptr addrspace(1) store float %"44", ptr addrspace(1) %"50", align 4 diff --git a/ptx/src/test/ll/div_approx.ll b/ptx/src/test/ll/div_approx.ll index 8498e78..84b0f2b 100644 --- a/ptx/src/test/ll/div_approx.ll +++ b/ptx/src/test/ll/div_approx.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"33", ptr add br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load float, ptr %"50", align 4 store float %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load float, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"33", ptr add %"47" = load float, ptr addrspace(5) %"38", align 4 %"45" = fdiv arcp afn float %"46", %"47" store float %"45", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load float, ptr addrspace(5) %"37", align 4 %"52" = inttoptr i64 %"48" to ptr store float %"49", ptr %"52", align 4 diff --git a/ptx/src/test/ll/ex2.ll b/ptx/src/test/ll/ex2.ll index f883ad4..1d51761 100644 --- a/ptx/src/test/ll/ex2.ll +++ b/ptx/src/test/ll/ex2.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"30", ptr addrspace( br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load float, ptr %"43", align 4 store float %"37", ptr addrspace(5) %"34", align 4 %"40" = load float, ptr addrspace(5) %"34", align 4 %"39" = call float @llvm.amdgcn.exp2.f32(float %"40") store float %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load float, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store float %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/extern_func.ll b/ptx/src/test/ll/extern_func.ll index 5d11365..24fe9ba 100644 --- a/ptx/src/test/ll/extern_func.ll +++ b/ptx/src/test/ll/extern_func.ll @@ -13,29 +13,29 @@ define amdgpu_kernel void @extern_func(ptr addrspace(4) byref(i64) %"44", ptr ad br label %"41" "41": ; preds = %1 - %"50" = load i64, ptr addrspace(4) %"44", align 4 - store i64 %"50", ptr addrspace(5) %"46", align 4 - %"51" = load i64, ptr addrspace(4) %"45", align 4 - store i64 %"51", ptr addrspace(5) %"47", align 4 - %"53" = load i64, ptr addrspace(5) %"46", align 4 + %"50" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"50", ptr addrspace(5) %"46", align 8 + %"51" = load i64, ptr addrspace(4) %"45", align 8 + store i64 %"51", ptr addrspace(5) %"47", align 8 + %"53" = load i64, ptr addrspace(5) %"46", align 8 %"61" = inttoptr i64 %"53" to ptr addrspace(1) - %"52" = load i64, ptr addrspace(1) %"61", align 4 - store i64 %"52", ptr addrspace(5) %"48", align 4 + %"52" = load i64, ptr addrspace(1) %"61", align 8 + store i64 %"52", ptr addrspace(5) %"48", align 8 %"55" = getelementptr inbounds i8, ptr addrspace(5) %"54", i64 0 - %"56" = load i64, ptr addrspace(5) %"48", align 4 - store i64 %"56", ptr addrspace(5) %"55", align 4 - %"39" = load i64, ptr addrspace(5) %"54", align 4 + %"56" = load i64, ptr addrspace(5) %"48", align 8 + store i64 %"56", ptr addrspace(5) %"55", align 8 + %"39" = load i64, ptr addrspace(5) %"54", align 8 %"40" = call [16 x i8] @foobar(i64 %"39") br label %"42" "42": ; preds = %"41" store [16 x i8] %"40", ptr addrspace(5) %"57", align 1 - %"58" = load i64, ptr addrspace(5) %"57", align 4 - store i64 %"58", ptr addrspace(5) %"49", align 4 - %"59" = load i64, ptr addrspace(5) %"47", align 4 - %"60" = load i64, ptr addrspace(5) %"49", align 4 + %"58" = load i64, ptr addrspace(5) %"57", align 8 + store i64 %"58", ptr addrspace(5) %"49", align 8 + %"59" = load i64, ptr addrspace(5) %"47", align 8 + %"60" = load i64, ptr addrspace(5) %"49", align 8 %"64" = inttoptr i64 %"59" to ptr - store i64 %"60", ptr %"64", align 4 + store i64 %"60", ptr %"64", align 8 ret void } diff --git a/ptx/src/test/ll/extern_shared.ll b/ptx/src/test/ll/extern_shared.ll index 19f7a7e..615b73f 100644 --- a/ptx/src/test/ll/extern_shared.ll +++ b/ptx/src/test/ll/extern_shared.ll @@ -10,22 +10,22 @@ define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"31", ptr br label %"30" "30": ; preds = %1 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"37" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"37" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"44" = inttoptr i64 %"39" to ptr addrspace(1) - %"38" = load i64, ptr addrspace(1) %"44", align 4 - store i64 %"38", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(5) %"35", align 4 - store i64 %"40", ptr addrspace(3) @shared_mem, align 4 - %"41" = load i64, ptr addrspace(3) @shared_mem, align 4 - store i64 %"41", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"38" = load i64, ptr addrspace(1) %"44", align 8 + store i64 %"38", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(5) %"35", align 8 + store i64 %"40", ptr addrspace(3) @shared_mem, align 8 + %"41" = load i64, ptr addrspace(3) @shared_mem, align 8 + store i64 %"41", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(5) %"34", align 8 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"47" = inttoptr i64 %"42" to ptr addrspace(1) - store i64 %"43", ptr addrspace(1) %"47", align 4 + store i64 %"43", ptr addrspace(1) %"47", align 8 ret void } diff --git a/ptx/src/test/ll/extern_shared_call.ll b/ptx/src/test/ll/extern_shared_call.ll index 54e00ee..22cf174 100644 --- a/ptx/src/test/ll/extern_shared_call.ll +++ b/ptx/src/test/ll/extern_shared_call.ll @@ -8,13 +8,13 @@ define void @incr_shared_2_global() #0 { br label %"33" "33": ; preds = %1 - %"37" = load i64, ptr addrspace(3) @shared_mem, align 4 - store i64 %"37", ptr addrspace(5) %"36", align 4 - %"39" = load i64, ptr addrspace(5) %"36", align 4 + %"37" = load i64, ptr addrspace(3) @shared_mem, align 8 + store i64 %"37", ptr addrspace(5) %"36", align 8 + %"39" = load i64, ptr addrspace(5) %"36", align 8 %"38" = add i64 %"39", 2 - store i64 %"38", ptr addrspace(5) %"36", align 4 - %"40" = load i64, ptr addrspace(5) %"36", align 4 - store i64 %"40", ptr addrspace(3) @shared_mem, align 4 + store i64 %"38", ptr addrspace(5) %"36", align 8 + %"40" = load i64, ptr addrspace(5) %"36", align 8 + store i64 %"40", ptr addrspace(3) @shared_mem, align 8 ret void } @@ -28,26 +28,26 @@ define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"41", br label %"34" "34": ; preds = %1 - %"46" = load i64, ptr addrspace(4) %"41", align 4 - store i64 %"46", ptr addrspace(5) %"43", align 4 - %"47" = load i64, ptr addrspace(4) %"42", align 4 - store i64 %"47", ptr addrspace(5) %"44", align 4 - %"49" = load i64, ptr addrspace(5) %"43", align 4 + %"46" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"46", ptr addrspace(5) %"43", align 8 + %"47" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"47", ptr addrspace(5) %"44", align 8 + %"49" = load i64, ptr addrspace(5) %"43", align 8 %"56" = inttoptr i64 %"49" to ptr addrspace(1) - %"48" = load i64, ptr addrspace(1) %"56", align 4 - store i64 %"48", ptr addrspace(5) %"45", align 4 - %"50" = load i64, ptr addrspace(5) %"45", align 4 - store i64 %"50", ptr addrspace(3) @shared_mem, align 4 + %"48" = load i64, ptr addrspace(1) %"56", align 8 + store i64 %"48", ptr addrspace(5) %"45", align 8 + %"50" = load i64, ptr addrspace(5) %"45", align 8 + store i64 %"50", ptr addrspace(3) @shared_mem, align 8 call void @incr_shared_2_global() br label %"35" "35": ; preds = %"34" - %"51" = load i64, ptr addrspace(3) @shared_mem, align 4 - store i64 %"51", ptr addrspace(5) %"45", align 4 - %"52" = load i64, ptr addrspace(5) %"44", align 4 - %"53" = load i64, ptr addrspace(5) %"45", align 4 + %"51" = load i64, ptr addrspace(3) @shared_mem, align 8 + store i64 %"51", ptr addrspace(5) %"45", align 8 + %"52" = load i64, ptr addrspace(5) %"44", align 8 + %"53" = load i64, ptr addrspace(5) %"45", align 8 %"59" = inttoptr i64 %"52" to ptr addrspace(1) - store i64 %"53", ptr addrspace(1) %"59", align 4 + store i64 %"53", ptr addrspace(1) %"59", align 8 ret void } diff --git a/ptx/src/test/ll/fma.ll b/ptx/src/test/ll/fma.ll index 184f902..e63d2a2 100644 --- a/ptx/src/test/ll/fma.ll +++ b/ptx/src/test/ll/fma.ll @@ -10,20 +10,20 @@ define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"36", ptr addrspace( br label %"35" "35": ; preds = %1 - %"43" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"43", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"44", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"43" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"43", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"44", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(5) %"38", align 8 %"57" = inttoptr i64 %"46" to ptr %"45" = load float, ptr %"57", align 4 store float %"45", ptr addrspace(5) %"40", align 4 - %"47" = load i64, ptr addrspace(5) %"38", align 4 + %"47" = load i64, ptr addrspace(5) %"38", align 8 %"58" = inttoptr i64 %"47" to ptr %"32" = getelementptr inbounds i8, ptr %"58", i64 4 %"48" = load float, ptr %"32", align 4 store float %"48", ptr addrspace(5) %"41", align 4 - %"49" = load i64, ptr addrspace(5) %"38", align 4 + %"49" = load i64, ptr addrspace(5) %"38", align 8 %"59" = inttoptr i64 %"49" to ptr %"34" = getelementptr inbounds i8, ptr %"59", i64 8 %"50" = load float, ptr %"34", align 4 @@ -33,7 +33,7 @@ define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"36", ptr addrspace( %"54" = load float, ptr addrspace(5) %"42", align 4 %"51" = call float @llvm.fma.f32(float %"52", float %"53", float %"54") store float %"51", ptr addrspace(5) %"40", align 4 - %"55" = load i64, ptr addrspace(5) %"39", align 4 + %"55" = load i64, ptr addrspace(5) %"39", align 8 %"56" = load float, ptr addrspace(5) %"40", align 4 %"60" = inttoptr i64 %"55" to ptr store float %"56", ptr %"60", align 4 diff --git a/ptx/src/test/ll/fmax.ll b/ptx/src/test/ll/fmax.ll index ba640cf..79c39d2 100644 --- a/ptx/src/test/ll/fmax.ll +++ b/ptx/src/test/ll/fmax.ll @@ -11,16 +11,16 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"35", ptr addrspace br label %"34" "34": ; preds = %1 - %"43" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"43", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"44", ptr addrspace(5) %"38", align 4 - %"46" = load i64, ptr addrspace(5) %"37", align 4 + %"43" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"43", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"44", ptr addrspace(5) %"38", align 8 + %"46" = load i64, ptr addrspace(5) %"37", align 8 %"55" = inttoptr i64 %"46" to ptr %"54" = load i16, ptr %"55", align 2 %"45" = bitcast i16 %"54" to half store half %"45", ptr addrspace(5) %"39", align 2 - %"47" = load i64, ptr addrspace(5) %"37", align 4 + %"47" = load i64, ptr addrspace(5) %"37", align 8 %"56" = inttoptr i64 %"47" to ptr %"33" = getelementptr inbounds i8, ptr %"56", i64 2 %"57" = load i16, ptr %"33", align 2 @@ -30,7 +30,7 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"35", ptr addrspace %"51" = load half, ptr addrspace(5) %"39", align 2 %"49" = call half @llvm.maxnum.f16(half %"50", half %"51") store half %"49", ptr addrspace(5) %"41", align 2 - %"52" = load i64, ptr addrspace(5) %"38", align 4 + %"52" = load i64, ptr addrspace(5) %"38", align 8 %"53" = load half, ptr addrspace(5) %"41", align 2 %"58" = inttoptr i64 %"52" to ptr %"59" = bitcast half %"53" to i16 diff --git a/ptx/src/test/ll/global_array.ll b/ptx/src/test/ll/global_array.ll index d36c3d1..20f434b 100644 --- a/ptx/src/test/ll/global_array.ll +++ b/ptx/src/test/ll/global_array.ll @@ -10,14 +10,14 @@ define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"31", ptr a br label %"30" "30": ; preds = %1 - store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"33", align 4 - %"37" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 + store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"33", align 8 + %"37" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"43" = inttoptr i64 %"39" to ptr addrspace(1) %"38" = load i32, ptr addrspace(1) %"43", align 4 store i32 %"38", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(5) %"34", align 4 + %"40" = load i64, ptr addrspace(5) %"34", align 8 %"41" = load i32, ptr addrspace(5) %"35", align 4 %"44" = inttoptr i64 %"40" to ptr addrspace(1) store i32 %"41", ptr addrspace(1) %"44", align 4 diff --git a/ptx/src/test/ll/lanemask_lt.ll b/ptx/src/test/ll/lanemask_lt.ll index 0efae06..2c7090e 100644 --- a/ptx/src/test/ll/lanemask_lt.ll +++ b/ptx/src/test/ll/lanemask_lt.ll @@ -12,11 +12,11 @@ define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"36", ptr ad br label %"33" "33": ; preds = %1 - %"43" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"43", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"44", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"43" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"43", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"44", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(5) %"38", align 8 %"56" = inttoptr i64 %"46" to ptr %"55" = load i32, ptr %"56", align 4 store i32 %"55", ptr addrspace(5) %"40", align 4 @@ -32,7 +32,7 @@ define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"36", ptr ad %"52" = load i32, ptr addrspace(5) %"42", align 4 %"60" = add i32 %"51", %"52" store i32 %"60", ptr addrspace(5) %"41", align 4 - %"53" = load i64, ptr addrspace(5) %"39", align 4 + %"53" = load i64, ptr addrspace(5) %"39", align 8 %"54" = load i32, ptr addrspace(5) %"41", align 4 %"63" = inttoptr i64 %"53" to ptr store i32 %"54", ptr %"63", align 4 diff --git a/ptx/src/test/ll/ld_st.ll b/ptx/src/test/ll/ld_st.ll index 016d5cf..16adde4 100644 --- a/ptx/src/test/ll/ld_st.ll +++ b/ptx/src/test/ll/ld_st.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"30", ptr addrspac br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"41" = inttoptr i64 %"38" to ptr - %"37" = load i64, ptr %"41", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 - %"40" = load i64, ptr addrspace(5) %"34", align 4 + %"37" = load i64, ptr %"41", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(5) %"33", align 8 + %"40" = load i64, ptr addrspace(5) %"34", align 8 %"42" = inttoptr i64 %"39" to ptr - store i64 %"40", ptr %"42", align 4 + store i64 %"40", ptr %"42", align 8 ret void } diff --git a/ptx/src/test/ll/ld_st_implicit.ll b/ptx/src/test/ll/ld_st_implicit.ll index e9095e9..c92ee53 100644 --- a/ptx/src/test/ll/ld_st_implicit.ll +++ b/ptx/src/test/ll/ld_st_implicit.ll @@ -8,19 +8,19 @@ define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"31", ptr br label %"30" "30": ; preds = %1 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"37" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - store i64 81985529216486895, ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"37" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + store i64 81985529216486895, ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"44" = inttoptr i64 %"40" to ptr addrspace(1) %"43" = load float, ptr addrspace(1) %"44", align 4 %2 = bitcast float %"43" to i32 %"39" = zext i32 %2 to i64 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"45" = inttoptr i64 %"41" to ptr addrspace(1) %3 = trunc i64 %"42" to i32 %"46" = bitcast i32 %3 to float diff --git a/ptx/src/test/ll/ld_st_offset.ll b/ptx/src/test/ll/ld_st_offset.ll index 25e68d6..d72346b 100644 --- a/ptx/src/test/ll/ld_st_offset.ll +++ b/ptx/src/test/ll/ld_st_offset.ll @@ -9,24 +9,24 @@ define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"35", ptr a br label %"34" "34": ; preds = %1 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(5) %"37", align 4 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(5) %"37", align 8 %"51" = inttoptr i64 %"44" to ptr %"43" = load i32, ptr %"51", align 4 store i32 %"43", ptr addrspace(5) %"39", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"52" = inttoptr i64 %"45" to ptr %"31" = getelementptr inbounds i8, ptr %"52", i64 4 %"46" = load i32, ptr %"31", align 4 store i32 %"46", ptr addrspace(5) %"40", align 4 - %"47" = load i64, ptr addrspace(5) %"38", align 4 + %"47" = load i64, ptr addrspace(5) %"38", align 8 %"48" = load i32, ptr addrspace(5) %"40", align 4 %"53" = inttoptr i64 %"47" to ptr store i32 %"48", ptr %"53", align 4 - %"49" = load i64, ptr addrspace(5) %"38", align 4 + %"49" = load i64, ptr addrspace(5) %"38", align 8 %"54" = inttoptr i64 %"49" to ptr %"33" = getelementptr inbounds i8, ptr %"54", i64 4 %"50" = load i32, ptr addrspace(5) %"39", align 4 diff --git a/ptx/src/test/ll/lg2.ll b/ptx/src/test/ll/lg2.ll index ed7de7a..ae0c03d 100644 --- a/ptx/src/test/ll/lg2.ll +++ b/ptx/src/test/ll/lg2.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"30", ptr addrspace( br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load float, ptr %"43", align 4 store float %"37", ptr addrspace(5) %"34", align 4 %"40" = load float, ptr addrspace(5) %"34", align 4 %"39" = call float @llvm.amdgcn.log.f32(float %"40") store float %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load float, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store float %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/local_align.ll b/ptx/src/test/ll/local_align.ll index 70dac59..29e123f 100644 --- a/ptx/src/test/ll/local_align.ll +++ b/ptx/src/test/ll/local_align.ll @@ -9,18 +9,18 @@ define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"31", ptr ad br label %"30" "30": ; preds = %1 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"37" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"37" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"42" = inttoptr i64 %"39" to ptr - %"38" = load i64, ptr %"42", align 4 - store i64 %"38", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"35", align 4 + %"38" = load i64, ptr %"42", align 8 + store i64 %"38", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(5) %"34", align 8 + %"41" = load i64, ptr addrspace(5) %"35", align 8 %"43" = inttoptr i64 %"40" to ptr - store i64 %"41", ptr %"43", align 4 + store i64 %"41", ptr %"43", align 8 ret void } diff --git a/ptx/src/test/ll/mad_s32.ll b/ptx/src/test/ll/mad_s32.ll index f0512a6..36bb709 100644 --- a/ptx/src/test/ll/mad_s32.ll +++ b/ptx/src/test/ll/mad_s32.ll @@ -11,20 +11,20 @@ define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"37", ptr addrsp br label %"36" "36": ; preds = %1 - %"45" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"45", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(4) %"38", align 4 - store i64 %"46", ptr addrspace(5) %"40", align 4 - %"48" = load i64, ptr addrspace(5) %"39", align 4 + %"45" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"45", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"46", ptr addrspace(5) %"40", align 8 + %"48" = load i64, ptr addrspace(5) %"39", align 8 %"59" = inttoptr i64 %"48" to ptr %"47" = load i32, ptr %"59", align 4 store i32 %"47", ptr addrspace(5) %"42", align 4 - %"49" = load i64, ptr addrspace(5) %"39", align 4 + %"49" = load i64, ptr addrspace(5) %"39", align 8 %"60" = inttoptr i64 %"49" to ptr %"33" = getelementptr inbounds i8, ptr %"60", i64 4 %"50" = load i32, ptr %"33", align 4 store i32 %"50", ptr addrspace(5) %"43", align 4 - %"51" = load i64, ptr addrspace(5) %"39", align 4 + %"51" = load i64, ptr addrspace(5) %"39", align 8 %"61" = inttoptr i64 %"51" to ptr %"35" = getelementptr inbounds i8, ptr %"61", i64 8 %"52" = load i32, ptr %"35", align 4 @@ -35,7 +35,7 @@ define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"37", ptr addrsp %2 = mul i32 %"54", %"55" %"53" = add i32 %2, %"56" store i32 %"53", ptr addrspace(5) %"41", align 4 - %"57" = load i64, ptr addrspace(5) %"40", align 4 + %"57" = load i64, ptr addrspace(5) %"40", align 8 %"58" = load i32, ptr addrspace(5) %"41", align 4 %"62" = inttoptr i64 %"57" to ptr store i32 %"58", ptr %"62", align 4 diff --git a/ptx/src/test/ll/mad_wide.ll b/ptx/src/test/ll/mad_wide.ll index f44ab2f..176c905 100644 --- a/ptx/src/test/ll/mad_wide.ll +++ b/ptx/src/test/ll/mad_wide.ll @@ -11,36 +11,36 @@ define amdgpu_kernel void @mad_wide(ptr addrspace(4) byref(i64) %"37", ptr addrs br label %"36" "36": ; preds = %1 - %"45" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"45", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(4) %"38", align 4 - store i64 %"46", ptr addrspace(5) %"40", align 4 - %"48" = load i64, ptr addrspace(5) %"39", align 4 + %"45" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"45", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"46", ptr addrspace(5) %"40", align 8 + %"48" = load i64, ptr addrspace(5) %"39", align 8 %"59" = inttoptr i64 %"48" to ptr %"47" = load i32, ptr %"59", align 4 store i32 %"47", ptr addrspace(5) %"42", align 4 - %"49" = load i64, ptr addrspace(5) %"39", align 4 + %"49" = load i64, ptr addrspace(5) %"39", align 8 %"60" = inttoptr i64 %"49" to ptr %"33" = getelementptr inbounds i8, ptr %"60", i64 4 %"50" = load i32, ptr %"33", align 4 store i32 %"50", ptr addrspace(5) %"43", align 4 - %"51" = load i64, ptr addrspace(5) %"39", align 4 + %"51" = load i64, ptr addrspace(5) %"39", align 8 %"61" = inttoptr i64 %"51" to ptr %"35" = getelementptr inbounds i8, ptr %"61", i64 8 - %"52" = load i64, ptr %"35", align 4 - store i64 %"52", ptr addrspace(5) %"44", align 4 + %"52" = load i64, ptr %"35", align 8 + store i64 %"52", ptr addrspace(5) %"44", align 8 %"54" = load i32, ptr addrspace(5) %"42", align 4 %"55" = load i32, ptr addrspace(5) %"43", align 4 - %"56" = load i64, ptr addrspace(5) %"44", align 4 + %"56" = load i64, ptr addrspace(5) %"44", align 8 %2 = sext i32 %"54" to i64 %3 = sext i32 %"55" to i64 %4 = mul i64 %2, %3 %"53" = add i64 %4, %"56" - store i64 %"53", ptr addrspace(5) %"41", align 4 - %"57" = load i64, ptr addrspace(5) %"40", align 4 - %"58" = load i64, ptr addrspace(5) %"41", align 4 + store i64 %"53", ptr addrspace(5) %"41", align 8 + %"57" = load i64, ptr addrspace(5) %"40", align 8 + %"58" = load i64, ptr addrspace(5) %"41", align 8 %"62" = inttoptr i64 %"57" to ptr - store i64 %"58", ptr %"62", align 4 + store i64 %"58", ptr %"62", align 8 ret void } diff --git a/ptx/src/test/ll/malformed_label.ll b/ptx/src/test/ll/malformed_label.ll index c8bac71..c512a5a 100644 --- a/ptx/src/test/ll/malformed_label.ll +++ b/ptx/src/test/ll/malformed_label.ll @@ -9,24 +9,24 @@ define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"34", pt br label %"32" "32": ; preds = %1 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 br label %"10" "10": ; preds = %"32" - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"48" = inttoptr i64 %"43" to ptr - %"42" = load i64, ptr %"48", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"45" = load i64, ptr addrspace(5) %"38", align 4 + %"42" = load i64, ptr %"48", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"45" = load i64, ptr addrspace(5) %"38", align 8 %"44" = add i64 %"45", 1 - store i64 %"44", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(5) %"37", align 4 - %"47" = load i64, ptr addrspace(5) %"39", align 4 + store i64 %"44", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(5) %"37", align 8 + %"47" = load i64, ptr addrspace(5) %"39", align 8 %"49" = inttoptr i64 %"46" to ptr - store i64 %"47", ptr %"49", align 4 + store i64 %"47", ptr %"49", align 8 ret void } diff --git a/ptx/src/test/ll/max.ll b/ptx/src/test/ll/max.ll index ae1256b..a72f4d9 100644 --- a/ptx/src/test/ll/max.ll +++ b/ptx/src/test/ll/max.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"33", ptr addrspace( br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load i32, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"33", ptr addrspace( %"47" = load i32, ptr addrspace(5) %"38", align 4 %"45" = call i32 @llvm.smax.i32(i32 %"46", i32 %"47") store i32 %"45", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load i32, ptr addrspace(5) %"37", align 4 %"52" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"52", align 4 diff --git a/ptx/src/test/ll/membar.ll b/ptx/src/test/ll/membar.ll index 7455f34..44f6cc0 100644 --- a/ptx/src/test/ll/membar.ll +++ b/ptx/src/test/ll/membar.ll @@ -8,16 +8,16 @@ define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"30", ptr addrspa br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"42" = inttoptr i64 %"38" to ptr %"41" = load i32, ptr %"42", align 4 store i32 %"41", ptr addrspace(5) %"34", align 4 fence seq_cst - %"39" = load i64, ptr addrspace(5) %"33", align 4 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"40" = load i32, ptr addrspace(5) %"34", align 4 %"43" = inttoptr i64 %"39" to ptr store i32 %"40", ptr %"43", align 4 diff --git a/ptx/src/test/ll/min.ll b/ptx/src/test/ll/min.ll index bf38592..f434899 100644 --- a/ptx/src/test/ll/min.ll +++ b/ptx/src/test/ll/min.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"33", ptr addrspace( br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load i32, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"33", ptr addrspace( %"47" = load i32, ptr addrspace(5) %"38", align 4 %"45" = call i32 @llvm.smin.i32(i32 %"46", i32 %"47") store i32 %"45", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load i32, ptr addrspace(5) %"37", align 4 %"52" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"52", align 4 diff --git a/ptx/src/test/ll/mov.ll b/ptx/src/test/ll/mov.ll index 5ecb1fb..d766ba2 100644 --- a/ptx/src/test/ll/mov.ll +++ b/ptx/src/test/ll/mov.ll @@ -9,20 +9,20 @@ define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"31", ptr addrspace( br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"45" = inttoptr i64 %"40" to ptr - %"39" = load i64, ptr %"45", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 - store i64 %"42", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"34", align 4 - %"44" = load i64, ptr addrspace(5) %"36", align 4 + %"39" = load i64, ptr %"45", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 + store i64 %"42", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"34", align 8 + %"44" = load i64, ptr addrspace(5) %"36", align 8 %"46" = inttoptr i64 %"43" to ptr - store i64 %"44", ptr %"46", align 4 + store i64 %"44", ptr %"46", align 8 ret void } diff --git a/ptx/src/test/ll/mov_address.ll b/ptx/src/test/ll/mov_address.ll index ea6ce80..7c8dd91 100644 --- a/ptx/src/test/ll/mov_address.ll +++ b/ptx/src/test/ll/mov_address.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"29", ptr ad "28": ; preds = %1 %"33" = ptrtoint ptr addrspace(5) %"10" to i64 - store i64 %"33", ptr addrspace(5) %"31", align 4 + store i64 %"33", ptr addrspace(5) %"31", align 8 ret void } diff --git a/ptx/src/test/ll/mul24_hi_s32.ll b/ptx/src/test/ll/mul24_hi_s32.ll index 20e32ed..920bcd2 100644 --- a/ptx/src/test/ll/mul24_hi_s32.ll +++ b/ptx/src/test/ll/mul24_hi_s32.ll @@ -10,11 +10,11 @@ define amdgpu_kernel void @mul24_hi_s32(ptr addrspace(4) byref(i64) %"32", ptr a br label %"31" "31": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"39", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"40", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + %"39" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"39", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"40", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"36", align 4 @@ -29,7 +29,7 @@ define amdgpu_kernel void @mul24_hi_s32(ptr addrspace(4) byref(i64) %"32", ptr a %5 = shl i32 %3, 16 %"45" = or i32 %4, %5 store i32 %"45", ptr addrspace(5) %"38", align 4 - %"48" = load i64, ptr addrspace(5) %"35", align 4 + %"48" = load i64, ptr addrspace(5) %"35", align 8 %"49" = load i32, ptr addrspace(5) %"38", align 4 %"51" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"51", align 4 diff --git a/ptx/src/test/ll/mul24_hi_u32.ll b/ptx/src/test/ll/mul24_hi_u32.ll index 427adb6..46462be 100644 --- a/ptx/src/test/ll/mul24_hi_u32.ll +++ b/ptx/src/test/ll/mul24_hi_u32.ll @@ -9,11 +9,11 @@ define amdgpu_kernel void @mul24_hi_u32(ptr addrspace(4) byref(i64) %"31", ptr a br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"46" = inttoptr i64 %"40" to ptr %"39" = load i32, ptr %"46", align 4 store i32 %"39", ptr addrspace(5) %"35", align 4 @@ -25,7 +25,7 @@ define amdgpu_kernel void @mul24_hi_u32(ptr addrspace(4) byref(i64) %"31", ptr a %5 = shl i32 %3, 16 %"41" = or i32 %4, %5 store i32 %"41", ptr addrspace(5) %"36", align 4 - %"44" = load i64, ptr addrspace(5) %"34", align 4 + %"44" = load i64, ptr addrspace(5) %"34", align 8 %"45" = load i32, ptr addrspace(5) %"36", align 4 %"47" = inttoptr i64 %"44" to ptr store i32 %"45", ptr %"47", align 4 diff --git a/ptx/src/test/ll/mul24_lo_s32.ll b/ptx/src/test/ll/mul24_lo_s32.ll index 06a8b3b..028323f 100644 --- a/ptx/src/test/ll/mul24_lo_s32.ll +++ b/ptx/src/test/ll/mul24_lo_s32.ll @@ -10,11 +10,11 @@ define amdgpu_kernel void @mul24_lo_s32(ptr addrspace(4) byref(i64) %"32", ptr a br label %"31" "31": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"39", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"40", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + %"39" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"39", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"40", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"36", align 4 @@ -25,7 +25,7 @@ define amdgpu_kernel void @mul24_lo_s32(ptr addrspace(4) byref(i64) %"32", ptr a %"47" = load i32, ptr addrspace(5) %"36", align 4 %"45" = call i32 @llvm.amdgcn.mul.i24(i32 %"46", i32 %"47") store i32 %"45", ptr addrspace(5) %"38", align 4 - %"48" = load i64, ptr addrspace(5) %"35", align 4 + %"48" = load i64, ptr addrspace(5) %"35", align 8 %"49" = load i32, ptr addrspace(5) %"38", align 4 %"51" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"51", align 4 diff --git a/ptx/src/test/ll/mul24_lo_u32.ll b/ptx/src/test/ll/mul24_lo_u32.ll index 47c26c4..dfe2984 100644 --- a/ptx/src/test/ll/mul24_lo_u32.ll +++ b/ptx/src/test/ll/mul24_lo_u32.ll @@ -9,11 +9,11 @@ define amdgpu_kernel void @mul24_lo_u32(ptr addrspace(4) byref(i64) %"31", ptr a br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"46" = inttoptr i64 %"40" to ptr %"39" = load i32, ptr %"46", align 4 store i32 %"39", ptr addrspace(5) %"35", align 4 @@ -21,7 +21,7 @@ define amdgpu_kernel void @mul24_lo_u32(ptr addrspace(4) byref(i64) %"31", ptr a %"43" = load i32, ptr addrspace(5) %"35", align 4 %"41" = call i32 @llvm.amdgcn.mul.u24(i32 %"42", i32 %"43") store i32 %"41", ptr addrspace(5) %"36", align 4 - %"44" = load i64, ptr addrspace(5) %"34", align 4 + %"44" = load i64, ptr addrspace(5) %"34", align 8 %"45" = load i32, ptr addrspace(5) %"36", align 4 %"47" = inttoptr i64 %"44" to ptr store i32 %"45", ptr %"47", align 4 diff --git a/ptx/src/test/ll/mul_ftz.ll b/ptx/src/test/ll/mul_ftz.ll index 60bfc6f..5f5b18b 100644 --- a/ptx/src/test/ll/mul_ftz.ll +++ b/ptx/src/test/ll/mul_ftz.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"33", ptr addrsp br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load float, ptr %"50", align 4 store float %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load float, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"33", ptr addrsp %"47" = load float, ptr addrspace(5) %"38", align 4 %"45" = fmul float %"46", %"47" store float %"45", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load float, ptr addrspace(5) %"37", align 4 %"52" = inttoptr i64 %"48" to ptr store float %"49", ptr %"52", align 4 diff --git a/ptx/src/test/ll/mul_hi.ll b/ptx/src/test/ll/mul_hi.ll index 155d766..591a266 100644 --- a/ptx/src/test/ll/mul_hi.ll +++ b/ptx/src/test/ll/mul_hi.ll @@ -9,24 +9,24 @@ define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"32", ptr addrspa br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr - %"40" = load i64, ptr %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %2 = zext i64 %"43" to i128 %3 = mul i128 %2, 2 %4 = lshr i128 %3, 64 %"42" = trunc i128 %4 to i64 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"47" = inttoptr i64 %"44" to ptr - store i64 %"45", ptr %"47", align 4 + store i64 %"45", ptr %"47", align 8 ret void } diff --git a/ptx/src/test/ll/mul_lo.ll b/ptx/src/test/ll/mul_lo.ll index b1a96dd..0dce8d8 100644 --- a/ptx/src/test/ll/mul_lo.ll +++ b/ptx/src/test/ll/mul_lo.ll @@ -9,21 +9,21 @@ define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"32", ptr addrspa br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr - %"40" = load i64, ptr %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"42" = mul i64 %"43", 2 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"47" = inttoptr i64 %"44" to ptr - store i64 %"45", ptr %"47", align 4 + store i64 %"45", ptr %"47", align 8 ret void } diff --git a/ptx/src/test/ll/mul_non_ftz.ll b/ptx/src/test/ll/mul_non_ftz.ll index afdd691..9599d07 100644 --- a/ptx/src/test/ll/mul_non_ftz.ll +++ b/ptx/src/test/ll/mul_non_ftz.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"33", ptr ad br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load float, ptr %"50", align 4 store float %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load float, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"33", ptr ad %"47" = load float, ptr addrspace(5) %"38", align 4 %"45" = fmul float %"46", %"47" store float %"45", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load float, ptr addrspace(5) %"37", align 4 %"52" = inttoptr i64 %"48" to ptr store float %"49", ptr %"52", align 4 diff --git a/ptx/src/test/ll/mul_wide.ll b/ptx/src/test/ll/mul_wide.ll index 7c37943..aedf98c 100644 --- a/ptx/src/test/ll/mul_wide.ll +++ b/ptx/src/test/ll/mul_wide.ll @@ -10,15 +10,15 @@ define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"34", ptr addrs br label %"33" "33": ; preds = %1 - %"41" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"41", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"36", align 4 + %"41" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"41", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"36", align 8 %"52" = inttoptr i64 %"44" to ptr addrspace(1) %"43" = load i32, ptr addrspace(1) %"52", align 4 store i32 %"43", ptr addrspace(5) %"38", align 4 - %"45" = load i64, ptr addrspace(5) %"36", align 4 + %"45" = load i64, ptr addrspace(5) %"36", align 8 %"53" = inttoptr i64 %"45" to ptr addrspace(1) %"32" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 4 %"46" = load i32, ptr addrspace(1) %"32", align 4 @@ -28,11 +28,11 @@ define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"34", ptr addrs %2 = sext i32 %"48" to i64 %3 = sext i32 %"49" to i64 %"47" = mul i64 %2, %3 - store i64 %"47", ptr addrspace(5) %"40", align 4 - %"50" = load i64, ptr addrspace(5) %"37", align 4 - %"51" = load i64, ptr addrspace(5) %"40", align 4 + store i64 %"47", ptr addrspace(5) %"40", align 8 + %"50" = load i64, ptr addrspace(5) %"37", align 8 + %"51" = load i64, ptr addrspace(5) %"40", align 8 %"54" = inttoptr i64 %"50" to ptr - store i64 %"51", ptr %"54", align 4 + store i64 %"51", ptr %"54", align 8 ret void } diff --git a/ptx/src/test/ll/multiple_return.ll b/ptx/src/test/ll/multiple_return.ll index 9ec20c7..42b98b8 100644 --- a/ptx/src/test/ll/multiple_return.ll +++ b/ptx/src/test/ll/multiple_return.ll @@ -30,11 +30,11 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"50", pt br label %"44" "44": ; preds = %1 - %"57" = load i64, ptr addrspace(4) %"50", align 4 - store i64 %"57", ptr addrspace(5) %"52", align 4 - %"58" = load i64, ptr addrspace(4) %"51", align 4 - store i64 %"58", ptr addrspace(5) %"53", align 4 - %"60" = load i64, ptr addrspace(5) %"52", align 4 + %"57" = load i64, ptr addrspace(4) %"50", align 8 + store i64 %"57", ptr addrspace(5) %"52", align 8 + %"58" = load i64, ptr addrspace(4) %"51", align 8 + store i64 %"58", ptr addrspace(5) %"53", align 8 + %"60" = load i64, ptr addrspace(5) %"52", align 8 %"68" = inttoptr i64 %"60" to ptr %"59" = load i32, ptr %"68", align 4 store i32 %"59", ptr addrspace(5) %"54", align 4 @@ -48,7 +48,7 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"50", pt br label %"45" "45": ; preds = %"44" - %"64" = load i64, ptr addrspace(5) %"53", align 4 + %"64" = load i64, ptr addrspace(5) %"53", align 8 %"65" = load i32, ptr addrspace(5) %"55", align 4 %"69" = inttoptr i64 %"64" to ptr store i32 %"65", ptr %"69", align 4 @@ -56,7 +56,7 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"50", pt br i1 %"66", label %"19", label %"20" "19": ; preds = %"45" - %"67" = load i64, ptr addrspace(5) %"53", align 4 + %"67" = load i64, ptr addrspace(5) %"53", align 8 %"70" = inttoptr i64 %"67" to ptr %"41" = getelementptr inbounds i8, ptr %"70", i64 4 store i32 123, ptr %"41", align 4 diff --git a/ptx/src/test/ll/neg.ll b/ptx/src/test/ll/neg.ll index d0992e7..aaa6890 100644 --- a/ptx/src/test/ll/neg.ll +++ b/ptx/src/test/ll/neg.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"30", ptr addrspace( br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load i32, ptr %"43", align 4 store i32 %"37", ptr addrspace(5) %"34", align 4 %"40" = load i32, ptr addrspace(5) %"34", align 4 %"39" = sub i32 0, %"40" store i32 %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load i32, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store i32 %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/non_scalar_ptr_offset.ll b/ptx/src/test/ll/non_scalar_ptr_offset.ll index a86e8ff..07df459 100644 --- a/ptx/src/test/ll/non_scalar_ptr_offset.ll +++ b/ptx/src/test/ll/non_scalar_ptr_offset.ll @@ -9,11 +9,11 @@ define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"3 br label %"33" "33": ; preds = %1 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"42" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"42" = load i64, ptr addrspace(5) %"36", align 8 %"50" = inttoptr i64 %"42" to ptr addrspace(1) %"32" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 8 %"30" = load <2 x i32>, ptr addrspace(1) %"32", align 8 @@ -25,7 +25,7 @@ define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"3 %"47" = load i32, ptr addrspace(5) %"39", align 4 %"45" = add i32 %"46", %"47" store i32 %"45", ptr addrspace(5) %"38", align 4 - %"48" = load i64, ptr addrspace(5) %"37", align 4 + %"48" = load i64, ptr addrspace(5) %"37", align 8 %"49" = load i32, ptr addrspace(5) %"38", align 4 %"51" = inttoptr i64 %"48" to ptr addrspace(1) store i32 %"49", ptr addrspace(1) %"51", align 4 diff --git a/ptx/src/test/ll/not.ll b/ptx/src/test/ll/not.ll index efb1f95..cc7c4a2 100644 --- a/ptx/src/test/ll/not.ll +++ b/ptx/src/test/ll/not.ll @@ -9,21 +9,21 @@ define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"31", ptr addrspace( br label %"30" "30": ; preds = %1 - %"37" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"37", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(5) %"33", align 4 + %"37" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"37", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(5) %"33", align 8 %"45" = inttoptr i64 %"40" to ptr - %"39" = load i64, ptr %"45", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr %"45", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"46" = xor i64 %"42", -1 - store i64 %"46", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"34", align 4 - %"44" = load i64, ptr addrspace(5) %"36", align 4 + store i64 %"46", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"34", align 8 + %"44" = load i64, ptr addrspace(5) %"36", align 8 %"48" = inttoptr i64 %"43" to ptr - store i64 %"44", ptr %"48", align 4 + store i64 %"44", ptr %"48", align 8 ret void } diff --git a/ptx/src/test/ll/ntid.ll b/ptx/src/test/ll/ntid.ll index 87185bc..f4a7d5c 100644 --- a/ptx/src/test/ll/ntid.ll +++ b/ptx/src/test/ll/ntid.ll @@ -11,11 +11,11 @@ define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"35", ptr addrspace br label %"32" "32": ; preds = %1 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"42" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"42", ptr addrspace(5) %"38", align 4 - %"44" = load i64, ptr addrspace(5) %"37", align 4 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"42" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"42", ptr addrspace(5) %"38", align 8 + %"44" = load i64, ptr addrspace(5) %"37", align 8 %"51" = inttoptr i64 %"44" to ptr %"43" = load i32, ptr %"51", align 4 store i32 %"43", ptr addrspace(5) %"39", align 4 @@ -28,7 +28,7 @@ define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"35", ptr addrspace %"48" = load i32, ptr addrspace(5) %"40", align 4 %"46" = add i32 %"47", %"48" store i32 %"46", ptr addrspace(5) %"39", align 4 - %"49" = load i64, ptr addrspace(5) %"38", align 4 + %"49" = load i64, ptr addrspace(5) %"38", align 8 %"50" = load i32, ptr addrspace(5) %"39", align 4 %"52" = inttoptr i64 %"49" to ptr store i32 %"50", ptr %"52", align 4 diff --git a/ptx/src/test/ll/or.ll b/ptx/src/test/ll/or.ll index e773120..1efa31a 100644 --- a/ptx/src/test/ll/or.ll +++ b/ptx/src/test/ll/or.ll @@ -9,27 +9,27 @@ define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4 br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr - %"41" = load i64, ptr %"50", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"41" = load i64, ptr %"50", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 8 - %"44" = load i64, ptr %"31", align 4 - store i64 %"44", ptr addrspace(5) %"38", align 4 - %"46" = load i64, ptr addrspace(5) %"37", align 4 - %"47" = load i64, ptr addrspace(5) %"38", align 4 + %"44" = load i64, ptr %"31", align 8 + store i64 %"44", ptr addrspace(5) %"38", align 8 + %"46" = load i64, ptr addrspace(5) %"37", align 8 + %"47" = load i64, ptr addrspace(5) %"38", align 8 %"52" = or i64 %"46", %"47" - store i64 %"52", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 - %"49" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"52", ptr addrspace(5) %"37", align 8 + %"48" = load i64, ptr addrspace(5) %"36", align 8 + %"49" = load i64, ptr addrspace(5) %"37", align 8 %"55" = inttoptr i64 %"48" to ptr - store i64 %"49", ptr %"55", align 4 + store i64 %"49", ptr %"55", align 8 ret void } diff --git a/ptx/src/test/ll/popc.ll b/ptx/src/test/ll/popc.ll index 0b379c5..6cbb49b 100644 --- a/ptx/src/test/ll/popc.ll +++ b/ptx/src/test/ll/popc.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"30", ptr addrspace br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load i32, ptr %"43", align 4 store i32 %"37", ptr addrspace(5) %"34", align 4 %"40" = load i32, ptr addrspace(5) %"34", align 4 %"44" = call i32 @llvm.ctpop.i32(i32 %"40") store i32 %"44", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load i32, ptr addrspace(5) %"34", align 4 %"45" = inttoptr i64 %"41" to ptr store i32 %"42", ptr %"45", align 4 diff --git a/ptx/src/test/ll/pred_not.ll b/ptx/src/test/ll/pred_not.ll index 65cc659..94c64f9 100644 --- a/ptx/src/test/ll/pred_not.ll +++ b/ptx/src/test/ll/pred_not.ll @@ -11,21 +11,21 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"41", ptr addrs br label %"40" "40": ; preds = %1 - %"49" = load i64, ptr addrspace(4) %"41", align 4 - store i64 %"49", ptr addrspace(5) %"43", align 4 - %"50" = load i64, ptr addrspace(4) %"42", align 4 - store i64 %"50", ptr addrspace(5) %"44", align 4 - %"52" = load i64, ptr addrspace(5) %"43", align 4 + %"49" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"49", ptr addrspace(5) %"43", align 8 + %"50" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"50", ptr addrspace(5) %"44", align 8 + %"52" = load i64, ptr addrspace(5) %"43", align 8 %"66" = inttoptr i64 %"52" to ptr - %"51" = load i64, ptr %"66", align 4 - store i64 %"51", ptr addrspace(5) %"45", align 4 - %"53" = load i64, ptr addrspace(5) %"43", align 4 + %"51" = load i64, ptr %"66", align 8 + store i64 %"51", ptr addrspace(5) %"45", align 8 + %"53" = load i64, ptr addrspace(5) %"43", align 8 %"67" = inttoptr i64 %"53" to ptr %"37" = getelementptr inbounds i8, ptr %"67", i64 8 - %"54" = load i64, ptr %"37", align 4 - store i64 %"54", ptr addrspace(5) %"46", align 4 - %"56" = load i64, ptr addrspace(5) %"45", align 4 - %"57" = load i64, ptr addrspace(5) %"46", align 4 + %"54" = load i64, ptr %"37", align 8 + store i64 %"54", ptr addrspace(5) %"46", align 8 + %"56" = load i64, ptr addrspace(5) %"45", align 8 + %"57" = load i64, ptr addrspace(5) %"46", align 8 %"55" = icmp ult i64 %"56", %"57" store i1 %"55", ptr addrspace(5) %"48", align 1 %"59" = load i1, ptr addrspace(5) %"48", align 1 @@ -35,7 +35,7 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"41", ptr addrs br i1 %"60", label %"16", label %"17" "16": ; preds = %"40" - store i64 1, ptr addrspace(5) %"47", align 4 + store i64 1, ptr addrspace(5) %"47", align 8 br label %"17" "17": ; preds = %"16", %"40" @@ -43,14 +43,14 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"41", ptr addrs br i1 %"62", label %"19", label %"18" "18": ; preds = %"17" - store i64 2, ptr addrspace(5) %"47", align 4 + store i64 2, ptr addrspace(5) %"47", align 8 br label %"19" "19": ; preds = %"18", %"17" - %"64" = load i64, ptr addrspace(5) %"44", align 4 - %"65" = load i64, ptr addrspace(5) %"47", align 4 + %"64" = load i64, ptr addrspace(5) %"44", align 8 + %"65" = load i64, ptr addrspace(5) %"47", align 8 %"68" = inttoptr i64 %"64" to ptr - store i64 %"65", ptr %"68", align 4 + store i64 %"65", ptr %"68", align 8 ret void } diff --git a/ptx/src/test/ll/prmt.ll b/ptx/src/test/ll/prmt.ll index 85f144e..25c9aa1 100644 --- a/ptx/src/test/ll/prmt.ll +++ b/ptx/src/test/ll/prmt.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"33", ptr addrspace br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load i32, ptr %"31", align 4 @@ -28,7 +28,7 @@ define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"33", ptr addrspace %3 = bitcast i32 %"47" to <4 x i8> %"52" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> store <4 x i8> %"52", ptr addrspace(5) %"38", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load i32, ptr addrspace(5) %"38", align 4 %"55" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"55", align 4 diff --git a/ptx/src/test/ll/rcp.ll b/ptx/src/test/ll/rcp.ll index 0995cc0..cdc6ffc 100644 --- a/ptx/src/test/ll/rcp.ll +++ b/ptx/src/test/ll/rcp.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"30", ptr addrspace( br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load float, ptr %"43", align 4 store float %"37", ptr addrspace(5) %"34", align 4 %"40" = load float, ptr addrspace(5) %"34", align 4 %"39" = call float @llvm.amdgcn.rcp.f32(float %"40") store float %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load float, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store float %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/reg_local.ll b/ptx/src/test/ll/reg_local.ll index a1b6bf2..1a5d4f0 100644 --- a/ptx/src/test/ll/reg_local.ll +++ b/ptx/src/test/ll/reg_local.ll @@ -9,27 +9,27 @@ define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"37", ptr addr br label %"36" "36": ; preds = %1 - %"42" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"42", ptr addrspace(5) %"39", align 4 - %"43" = load i64, ptr addrspace(4) %"38", align 4 - store i64 %"43", ptr addrspace(5) %"40", align 4 - %"45" = load i64, ptr addrspace(5) %"39", align 4 + %"42" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"42", ptr addrspace(5) %"39", align 8 + %"43" = load i64, ptr addrspace(4) %"38", align 8 + store i64 %"43", ptr addrspace(5) %"40", align 8 + %"45" = load i64, ptr addrspace(5) %"39", align 8 %"51" = inttoptr i64 %"45" to ptr addrspace(1) - %"50" = load i64, ptr addrspace(1) %"51", align 4 - store i64 %"50", ptr addrspace(5) %"41", align 4 - %"46" = load i64, ptr addrspace(5) %"41", align 4 + %"50" = load i64, ptr addrspace(1) %"51", align 8 + store i64 %"50", ptr addrspace(5) %"41", align 8 + %"46" = load i64, ptr addrspace(5) %"41", align 8 %"31" = add i64 %"46", 1 %"52" = addrspacecast ptr addrspace(5) %"10" to ptr - store i64 %"31", ptr %"52", align 4 + store i64 %"31", ptr %"52", align 8 %"54" = addrspacecast ptr addrspace(5) %"10" to ptr %"33" = getelementptr inbounds i8, ptr %"54", i64 0 - %"55" = load i64, ptr %"33", align 4 - store i64 %"55", ptr addrspace(5) %"41", align 4 - %"48" = load i64, ptr addrspace(5) %"40", align 4 + %"55" = load i64, ptr %"33", align 8 + store i64 %"55", ptr addrspace(5) %"41", align 8 + %"48" = load i64, ptr addrspace(5) %"40", align 8 %"56" = inttoptr i64 %"48" to ptr addrspace(1) %"35" = getelementptr inbounds i8, ptr addrspace(1) %"56", i64 0 - %"49" = load i64, ptr addrspace(5) %"41", align 4 - store i64 %"49", ptr addrspace(1) %"35", align 4 + %"49" = load i64, ptr addrspace(5) %"41", align 8 + store i64 %"49", ptr addrspace(1) %"35", align 8 ret void } diff --git a/ptx/src/test/ll/rem.ll b/ptx/src/test/ll/rem.ll index dd33785..684f3b1 100644 --- a/ptx/src/test/ll/rem.ll +++ b/ptx/src/test/ll/rem.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"33", ptr addrspace( br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load i32, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"33", ptr addrspace( %"47" = load i32, ptr addrspace(5) %"38", align 4 %"45" = srem i32 %"46", %"47" store i32 %"45", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load i32, ptr addrspace(5) %"37", align 4 %"52" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"52", align 4 diff --git a/ptx/src/test/ll/rsqrt.ll b/ptx/src/test/ll/rsqrt.ll index 04ca3e5..8b508b3 100644 --- a/ptx/src/test/ll/rsqrt.ll +++ b/ptx/src/test/ll/rsqrt.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"30", ptr addrspac br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load double, ptr %"43", align 8 store double %"37", ptr addrspace(5) %"34", align 8 %"40" = load double, ptr addrspace(5) %"34", align 8 %"39" = call double @llvm.amdgcn.rsq.f64(double %"40") store double %"39", ptr addrspace(5) %"34", align 8 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load double, ptr addrspace(5) %"34", align 8 %"44" = inttoptr i64 %"41" to ptr store double %"42", ptr %"44", align 8 diff --git a/ptx/src/test/ll/selp.ll b/ptx/src/test/ll/selp.ll index 918c4df..98d1166 100644 --- a/ptx/src/test/ll/selp.ll +++ b/ptx/src/test/ll/selp.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"34", ptr addrspace br label %"33" "33": ; preds = %1 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"51" = inttoptr i64 %"43" to ptr %"42" = load i16, ptr %"51", align 2 store i16 %"42", ptr addrspace(5) %"38", align 2 - %"44" = load i64, ptr addrspace(5) %"36", align 4 + %"44" = load i64, ptr addrspace(5) %"36", align 8 %"52" = inttoptr i64 %"44" to ptr %"31" = getelementptr inbounds i8, ptr %"52", i64 2 %"45" = load i16, ptr %"31", align 2 @@ -26,7 +26,7 @@ define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"34", ptr addrspace %"48" = load i16, ptr addrspace(5) %"39", align 2 %"46" = select i1 false, i16 %"47", i16 %"48" store i16 %"46", ptr addrspace(5) %"38", align 2 - %"49" = load i64, ptr addrspace(5) %"37", align 4 + %"49" = load i64, ptr addrspace(5) %"37", align 8 %"50" = load i16, ptr addrspace(5) %"38", align 2 %"53" = inttoptr i64 %"49" to ptr store i16 %"50", ptr %"53", align 2 diff --git a/ptx/src/test/ll/selp_true.ll b/ptx/src/test/ll/selp_true.ll index a422f89..e2f506d 100644 --- a/ptx/src/test/ll/selp_true.ll +++ b/ptx/src/test/ll/selp_true.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"34", ptr addr br label %"33" "33": ; preds = %1 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"41" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"41" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"41", ptr addrspace(5) %"37", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"51" = inttoptr i64 %"43" to ptr %"42" = load i16, ptr %"51", align 2 store i16 %"42", ptr addrspace(5) %"38", align 2 - %"44" = load i64, ptr addrspace(5) %"36", align 4 + %"44" = load i64, ptr addrspace(5) %"36", align 8 %"52" = inttoptr i64 %"44" to ptr %"31" = getelementptr inbounds i8, ptr %"52", i64 2 %"45" = load i16, ptr %"31", align 2 @@ -26,7 +26,7 @@ define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"34", ptr addr %"48" = load i16, ptr addrspace(5) %"39", align 2 %"46" = select i1 true, i16 %"47", i16 %"48" store i16 %"46", ptr addrspace(5) %"38", align 2 - %"49" = load i64, ptr addrspace(5) %"37", align 4 + %"49" = load i64, ptr addrspace(5) %"37", align 8 %"50" = load i16, ptr addrspace(5) %"38", align 2 %"53" = inttoptr i64 %"49" to ptr store i16 %"50", ptr %"53", align 2 diff --git a/ptx/src/test/ll/setp.ll b/ptx/src/test/ll/setp.ll index d0617b8..0ed02d7 100644 --- a/ptx/src/test/ll/setp.ll +++ b/ptx/src/test/ll/setp.ll @@ -11,28 +11,28 @@ define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"41", ptr addrspace br label %"40" "40": ; preds = %1 - %"49" = load i64, ptr addrspace(4) %"41", align 4 - store i64 %"49", ptr addrspace(5) %"43", align 4 - %"50" = load i64, ptr addrspace(4) %"42", align 4 - store i64 %"50", ptr addrspace(5) %"44", align 4 - %"52" = load i64, ptr addrspace(5) %"43", align 4 + %"49" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"49", ptr addrspace(5) %"43", align 8 + %"50" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"50", ptr addrspace(5) %"44", align 8 + %"52" = load i64, ptr addrspace(5) %"43", align 8 %"64" = inttoptr i64 %"52" to ptr - %"51" = load i64, ptr %"64", align 4 - store i64 %"51", ptr addrspace(5) %"45", align 4 - %"53" = load i64, ptr addrspace(5) %"43", align 4 + %"51" = load i64, ptr %"64", align 8 + store i64 %"51", ptr addrspace(5) %"45", align 8 + %"53" = load i64, ptr addrspace(5) %"43", align 8 %"65" = inttoptr i64 %"53" to ptr %"37" = getelementptr inbounds i8, ptr %"65", i64 8 - %"54" = load i64, ptr %"37", align 4 - store i64 %"54", ptr addrspace(5) %"46", align 4 - %"56" = load i64, ptr addrspace(5) %"45", align 4 - %"57" = load i64, ptr addrspace(5) %"46", align 4 + %"54" = load i64, ptr %"37", align 8 + store i64 %"54", ptr addrspace(5) %"46", align 8 + %"56" = load i64, ptr addrspace(5) %"45", align 8 + %"57" = load i64, ptr addrspace(5) %"46", align 8 %"55" = icmp ult i64 %"56", %"57" store i1 %"55", ptr addrspace(5) %"48", align 1 %"58" = load i1, ptr addrspace(5) %"48", align 1 br i1 %"58", label %"16", label %"17" "16": ; preds = %"40" - store i64 1, ptr addrspace(5) %"47", align 4 + store i64 1, ptr addrspace(5) %"47", align 8 br label %"17" "17": ; preds = %"16", %"40" @@ -40,14 +40,14 @@ define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"41", ptr addrspace br i1 %"60", label %"19", label %"18" "18": ; preds = %"17" - store i64 2, ptr addrspace(5) %"47", align 4 + store i64 2, ptr addrspace(5) %"47", align 8 br label %"19" "19": ; preds = %"18", %"17" - %"62" = load i64, ptr addrspace(5) %"44", align 4 - %"63" = load i64, ptr addrspace(5) %"47", align 4 + %"62" = load i64, ptr addrspace(5) %"44", align 8 + %"63" = load i64, ptr addrspace(5) %"47", align 8 %"66" = inttoptr i64 %"62" to ptr - store i64 %"63", ptr %"66", align 4 + store i64 %"63", ptr %"66", align 8 ret void } diff --git a/ptx/src/test/ll/setp_gt.ll b/ptx/src/test/ll/setp_gt.ll index c02b59e..7b18301 100644 --- a/ptx/src/test/ll/setp_gt.ll +++ b/ptx/src/test/ll/setp_gt.ll @@ -11,15 +11,15 @@ define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"39", ptr addrsp br label %"38" "38": ; preds = %1 - %"47" = load i64, ptr addrspace(4) %"39", align 4 - store i64 %"47", ptr addrspace(5) %"41", align 4 - %"48" = load i64, ptr addrspace(4) %"40", align 4 - store i64 %"48", ptr addrspace(5) %"42", align 4 - %"50" = load i64, ptr addrspace(5) %"41", align 4 + %"47" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"47", ptr addrspace(5) %"41", align 8 + %"48" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"48", ptr addrspace(5) %"42", align 8 + %"50" = load i64, ptr addrspace(5) %"41", align 8 %"64" = inttoptr i64 %"50" to ptr %"49" = load float, ptr %"64", align 4 store float %"49", ptr addrspace(5) %"43", align 4 - %"51" = load i64, ptr addrspace(5) %"41", align 4 + %"51" = load i64, ptr addrspace(5) %"41", align 8 %"65" = inttoptr i64 %"51" to ptr %"37" = getelementptr inbounds i8, ptr %"65", i64 4 %"52" = load float, ptr %"37", align 4 @@ -46,7 +46,7 @@ define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"39", ptr addrsp br label %"19" "19": ; preds = %"18", %"17" - %"62" = load i64, ptr addrspace(5) %"42", align 4 + %"62" = load i64, ptr addrspace(5) %"42", align 8 %"63" = load float, ptr addrspace(5) %"45", align 4 %"66" = inttoptr i64 %"62" to ptr store float %"63", ptr %"66", align 4 diff --git a/ptx/src/test/ll/setp_leu.ll b/ptx/src/test/ll/setp_leu.ll index 5d19314..21f8d26 100644 --- a/ptx/src/test/ll/setp_leu.ll +++ b/ptx/src/test/ll/setp_leu.ll @@ -11,15 +11,15 @@ define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"39", ptr addrs br label %"38" "38": ; preds = %1 - %"47" = load i64, ptr addrspace(4) %"39", align 4 - store i64 %"47", ptr addrspace(5) %"41", align 4 - %"48" = load i64, ptr addrspace(4) %"40", align 4 - store i64 %"48", ptr addrspace(5) %"42", align 4 - %"50" = load i64, ptr addrspace(5) %"41", align 4 + %"47" = load i64, ptr addrspace(4) %"39", align 8 + store i64 %"47", ptr addrspace(5) %"41", align 8 + %"48" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"48", ptr addrspace(5) %"42", align 8 + %"50" = load i64, ptr addrspace(5) %"41", align 8 %"64" = inttoptr i64 %"50" to ptr %"49" = load float, ptr %"64", align 4 store float %"49", ptr addrspace(5) %"43", align 4 - %"51" = load i64, ptr addrspace(5) %"41", align 4 + %"51" = load i64, ptr addrspace(5) %"41", align 8 %"65" = inttoptr i64 %"51" to ptr %"37" = getelementptr inbounds i8, ptr %"65", i64 4 %"52" = load float, ptr %"37", align 4 @@ -46,7 +46,7 @@ define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"39", ptr addrs br label %"19" "19": ; preds = %"18", %"17" - %"62" = load i64, ptr addrspace(5) %"42", align 4 + %"62" = load i64, ptr addrspace(5) %"42", align 8 %"63" = load float, ptr addrspace(5) %"45", align 4 %"66" = inttoptr i64 %"62" to ptr store float %"63", ptr %"66", align 4 diff --git a/ptx/src/test/ll/setp_nan.ll b/ptx/src/test/ll/setp_nan.ll index ca1e98b..6910977 100644 --- a/ptx/src/test/ll/setp_nan.ll +++ b/ptx/src/test/ll/setp_nan.ll @@ -17,45 +17,45 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"82" "82": ; preds = %1 - %"97" = load i64, ptr addrspace(4) %"83", align 4 - store i64 %"97", ptr addrspace(5) %"85", align 4 - %"98" = load i64, ptr addrspace(4) %"84", align 4 - store i64 %"98", ptr addrspace(5) %"86", align 4 - %"100" = load i64, ptr addrspace(5) %"85", align 4 + %"97" = load i64, ptr addrspace(4) %"83", align 8 + store i64 %"97", ptr addrspace(5) %"85", align 8 + %"98" = load i64, ptr addrspace(4) %"84", align 8 + store i64 %"98", ptr addrspace(5) %"86", align 8 + %"100" = load i64, ptr addrspace(5) %"85", align 8 %"151" = inttoptr i64 %"100" to ptr %"99" = load float, ptr %"151", align 4 store float %"99", ptr addrspace(5) %"87", align 4 - %"101" = load i64, ptr addrspace(5) %"85", align 4 + %"101" = load i64, ptr addrspace(5) %"85", align 8 %"152" = inttoptr i64 %"101" to ptr %"55" = getelementptr inbounds i8, ptr %"152", i64 4 %"102" = load float, ptr %"55", align 4 store float %"102", ptr addrspace(5) %"88", align 4 - %"103" = load i64, ptr addrspace(5) %"85", align 4 + %"103" = load i64, ptr addrspace(5) %"85", align 8 %"153" = inttoptr i64 %"103" to ptr %"57" = getelementptr inbounds i8, ptr %"153", i64 8 %"104" = load float, ptr %"57", align 4 store float %"104", ptr addrspace(5) %"89", align 4 - %"105" = load i64, ptr addrspace(5) %"85", align 4 + %"105" = load i64, ptr addrspace(5) %"85", align 8 %"154" = inttoptr i64 %"105" to ptr %"59" = getelementptr inbounds i8, ptr %"154", i64 12 %"106" = load float, ptr %"59", align 4 store float %"106", ptr addrspace(5) %"90", align 4 - %"107" = load i64, ptr addrspace(5) %"85", align 4 + %"107" = load i64, ptr addrspace(5) %"85", align 8 %"155" = inttoptr i64 %"107" to ptr %"61" = getelementptr inbounds i8, ptr %"155", i64 16 %"108" = load float, ptr %"61", align 4 store float %"108", ptr addrspace(5) %"91", align 4 - %"109" = load i64, ptr addrspace(5) %"85", align 4 + %"109" = load i64, ptr addrspace(5) %"85", align 8 %"156" = inttoptr i64 %"109" to ptr %"63" = getelementptr inbounds i8, ptr %"156", i64 20 %"110" = load float, ptr %"63", align 4 store float %"110", ptr addrspace(5) %"92", align 4 - %"111" = load i64, ptr addrspace(5) %"85", align 4 + %"111" = load i64, ptr addrspace(5) %"85", align 8 %"157" = inttoptr i64 %"111" to ptr %"65" = getelementptr inbounds i8, ptr %"157", i64 24 %"112" = load float, ptr %"65", align 4 store float %"112", ptr addrspace(5) %"93", align 4 - %"113" = load i64, ptr addrspace(5) %"85", align 4 + %"113" = load i64, ptr addrspace(5) %"85", align 8 %"158" = inttoptr i64 %"113" to ptr %"67" = getelementptr inbounds i8, ptr %"158", i64 28 %"114" = load float, ptr %"67", align 4 @@ -80,7 +80,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"25" "25": ; preds = %"24", %"23" - %"122" = load i64, ptr addrspace(5) %"86", align 4 + %"122" = load i64, ptr addrspace(5) %"86", align 8 %"123" = load i32, ptr addrspace(5) %"95", align 4 %"159" = inttoptr i64 %"122" to ptr store i32 %"123", ptr %"159", align 4 @@ -104,7 +104,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"29" "29": ; preds = %"28", %"27" - %"131" = load i64, ptr addrspace(5) %"86", align 4 + %"131" = load i64, ptr addrspace(5) %"86", align 8 %"160" = inttoptr i64 %"131" to ptr %"73" = getelementptr inbounds i8, ptr %"160", i64 4 %"132" = load i32, ptr addrspace(5) %"95", align 4 @@ -129,7 +129,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"33" "33": ; preds = %"32", %"31" - %"140" = load i64, ptr addrspace(5) %"86", align 4 + %"140" = load i64, ptr addrspace(5) %"86", align 8 %"161" = inttoptr i64 %"140" to ptr %"77" = getelementptr inbounds i8, ptr %"161", i64 8 %"141" = load i32, ptr addrspace(5) %"95", align 4 @@ -154,7 +154,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"37" "37": ; preds = %"36", %"35" - %"149" = load i64, ptr addrspace(5) %"86", align 4 + %"149" = load i64, ptr addrspace(5) %"86", align 8 %"162" = inttoptr i64 %"149" to ptr %"81" = getelementptr inbounds i8, ptr %"162", i64 12 %"150" = load i32, ptr addrspace(5) %"95", align 4 diff --git a/ptx/src/test/ll/setp_num.ll b/ptx/src/test/ll/setp_num.ll index 4a6d56f..834ade6 100644 --- a/ptx/src/test/ll/setp_num.ll +++ b/ptx/src/test/ll/setp_num.ll @@ -17,45 +17,45 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"82" "82": ; preds = %1 - %"97" = load i64, ptr addrspace(4) %"83", align 4 - store i64 %"97", ptr addrspace(5) %"85", align 4 - %"98" = load i64, ptr addrspace(4) %"84", align 4 - store i64 %"98", ptr addrspace(5) %"86", align 4 - %"100" = load i64, ptr addrspace(5) %"85", align 4 + %"97" = load i64, ptr addrspace(4) %"83", align 8 + store i64 %"97", ptr addrspace(5) %"85", align 8 + %"98" = load i64, ptr addrspace(4) %"84", align 8 + store i64 %"98", ptr addrspace(5) %"86", align 8 + %"100" = load i64, ptr addrspace(5) %"85", align 8 %"151" = inttoptr i64 %"100" to ptr %"99" = load float, ptr %"151", align 4 store float %"99", ptr addrspace(5) %"87", align 4 - %"101" = load i64, ptr addrspace(5) %"85", align 4 + %"101" = load i64, ptr addrspace(5) %"85", align 8 %"152" = inttoptr i64 %"101" to ptr %"55" = getelementptr inbounds i8, ptr %"152", i64 4 %"102" = load float, ptr %"55", align 4 store float %"102", ptr addrspace(5) %"88", align 4 - %"103" = load i64, ptr addrspace(5) %"85", align 4 + %"103" = load i64, ptr addrspace(5) %"85", align 8 %"153" = inttoptr i64 %"103" to ptr %"57" = getelementptr inbounds i8, ptr %"153", i64 8 %"104" = load float, ptr %"57", align 4 store float %"104", ptr addrspace(5) %"89", align 4 - %"105" = load i64, ptr addrspace(5) %"85", align 4 + %"105" = load i64, ptr addrspace(5) %"85", align 8 %"154" = inttoptr i64 %"105" to ptr %"59" = getelementptr inbounds i8, ptr %"154", i64 12 %"106" = load float, ptr %"59", align 4 store float %"106", ptr addrspace(5) %"90", align 4 - %"107" = load i64, ptr addrspace(5) %"85", align 4 + %"107" = load i64, ptr addrspace(5) %"85", align 8 %"155" = inttoptr i64 %"107" to ptr %"61" = getelementptr inbounds i8, ptr %"155", i64 16 %"108" = load float, ptr %"61", align 4 store float %"108", ptr addrspace(5) %"91", align 4 - %"109" = load i64, ptr addrspace(5) %"85", align 4 + %"109" = load i64, ptr addrspace(5) %"85", align 8 %"156" = inttoptr i64 %"109" to ptr %"63" = getelementptr inbounds i8, ptr %"156", i64 20 %"110" = load float, ptr %"63", align 4 store float %"110", ptr addrspace(5) %"92", align 4 - %"111" = load i64, ptr addrspace(5) %"85", align 4 + %"111" = load i64, ptr addrspace(5) %"85", align 8 %"157" = inttoptr i64 %"111" to ptr %"65" = getelementptr inbounds i8, ptr %"157", i64 24 %"112" = load float, ptr %"65", align 4 store float %"112", ptr addrspace(5) %"93", align 4 - %"113" = load i64, ptr addrspace(5) %"85", align 4 + %"113" = load i64, ptr addrspace(5) %"85", align 8 %"158" = inttoptr i64 %"113" to ptr %"67" = getelementptr inbounds i8, ptr %"158", i64 28 %"114" = load float, ptr %"67", align 4 @@ -80,7 +80,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"25" "25": ; preds = %"24", %"23" - %"122" = load i64, ptr addrspace(5) %"86", align 4 + %"122" = load i64, ptr addrspace(5) %"86", align 8 %"123" = load i32, ptr addrspace(5) %"95", align 4 %"159" = inttoptr i64 %"122" to ptr store i32 %"123", ptr %"159", align 4 @@ -104,7 +104,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"29" "29": ; preds = %"28", %"27" - %"131" = load i64, ptr addrspace(5) %"86", align 4 + %"131" = load i64, ptr addrspace(5) %"86", align 8 %"160" = inttoptr i64 %"131" to ptr %"73" = getelementptr inbounds i8, ptr %"160", i64 4 %"132" = load i32, ptr addrspace(5) %"95", align 4 @@ -129,7 +129,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"33" "33": ; preds = %"32", %"31" - %"140" = load i64, ptr addrspace(5) %"86", align 4 + %"140" = load i64, ptr addrspace(5) %"86", align 8 %"161" = inttoptr i64 %"140" to ptr %"77" = getelementptr inbounds i8, ptr %"161", i64 8 %"141" = load i32, ptr addrspace(5) %"95", align 4 @@ -154,7 +154,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs br label %"37" "37": ; preds = %"36", %"35" - %"149" = load i64, ptr addrspace(5) %"86", align 4 + %"149" = load i64, ptr addrspace(5) %"86", align 8 %"162" = inttoptr i64 %"149" to ptr %"81" = getelementptr inbounds i8, ptr %"162", i64 12 %"150" = load i32, ptr addrspace(5) %"95", align 4 diff --git a/ptx/src/test/ll/shared_ptr_32.ll b/ptx/src/test/ll/shared_ptr_32.ll index 5a6f55f..03fbbfd 100644 --- a/ptx/src/test/ll/shared_ptr_32.ll +++ b/ptx/src/test/ll/shared_ptr_32.ll @@ -12,28 +12,28 @@ define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"35", ptr br label %"34" "34": ; preds = %1 - %"42" = load i64, ptr addrspace(4) %"35", align 4 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"43", ptr addrspace(5) %"38", align 4 + %"42" = load i64, ptr addrspace(4) %"35", align 8 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"43" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"43", ptr addrspace(5) %"38", align 8 store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(5) %"37", align 4 + %"46" = load i64, ptr addrspace(5) %"37", align 8 %"54" = inttoptr i64 %"46" to ptr addrspace(1) - %"45" = load i64, ptr addrspace(1) %"54", align 4 - store i64 %"45", ptr addrspace(5) %"40", align 4 + %"45" = load i64, ptr addrspace(1) %"54", align 8 + store i64 %"45", ptr addrspace(5) %"40", align 8 %"47" = load i32, ptr addrspace(5) %"39", align 4 - %"48" = load i64, ptr addrspace(5) %"40", align 4 + %"48" = load i64, ptr addrspace(5) %"40", align 8 %"55" = inttoptr i32 %"47" to ptr addrspace(3) - store i64 %"48", ptr addrspace(3) %"55", align 4 + store i64 %"48", ptr addrspace(3) %"55", align 8 %"49" = load i32, ptr addrspace(5) %"39", align 4 %"56" = inttoptr i32 %"49" to ptr addrspace(3) %"33" = getelementptr inbounds i8, ptr addrspace(3) %"56", i64 0 - %"50" = load i64, ptr addrspace(3) %"33", align 4 - store i64 %"50", ptr addrspace(5) %"41", align 4 - %"51" = load i64, ptr addrspace(5) %"38", align 4 - %"52" = load i64, ptr addrspace(5) %"41", align 4 + %"50" = load i64, ptr addrspace(3) %"33", align 8 + store i64 %"50", ptr addrspace(5) %"41", align 8 + %"51" = load i64, ptr addrspace(5) %"38", align 8 + %"52" = load i64, ptr addrspace(5) %"41", align 8 %"57" = inttoptr i64 %"51" to ptr addrspace(1) - store i64 %"52", ptr addrspace(1) %"57", align 4 + store i64 %"52", ptr addrspace(1) %"57", align 8 ret void } diff --git a/ptx/src/test/ll/shared_ptr_take_address.ll b/ptx/src/test/ll/shared_ptr_take_address.ll index b075ccb..b632d41 100644 --- a/ptx/src/test/ll/shared_ptr_take_address.ll +++ b/ptx/src/test/ll/shared_ptr_take_address.ll @@ -12,27 +12,27 @@ define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) % br label %"32" "32": ; preds = %1 - %"40" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"40", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"41", ptr addrspace(5) %"36", align 4 - store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 + %"40" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"40", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"41", ptr addrspace(5) %"36", align 8 + store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 %"52" = inttoptr i64 %"44" to ptr addrspace(1) - %"43" = load i64, ptr addrspace(1) %"52", align 4 - store i64 %"43", ptr addrspace(5) %"38", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 - %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"43" = load i64, ptr addrspace(1) %"52", align 8 + store i64 %"43", ptr addrspace(5) %"38", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 + %"46" = load i64, ptr addrspace(5) %"38", align 8 %"53" = inttoptr i64 %"45" to ptr addrspace(3) - store i64 %"46", ptr addrspace(3) %"53", align 4 - %"48" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"46", ptr addrspace(3) %"53", align 8 + %"48" = load i64, ptr addrspace(5) %"37", align 8 %"54" = inttoptr i64 %"48" to ptr addrspace(3) - %"47" = load i64, ptr addrspace(3) %"54", align 4 - store i64 %"47", ptr addrspace(5) %"39", align 4 - %"49" = load i64, ptr addrspace(5) %"36", align 4 - %"50" = load i64, ptr addrspace(5) %"39", align 4 + %"47" = load i64, ptr addrspace(3) %"54", align 8 + store i64 %"47", ptr addrspace(5) %"39", align 8 + %"49" = load i64, ptr addrspace(5) %"36", align 8 + %"50" = load i64, ptr addrspace(5) %"39", align 8 %"55" = inttoptr i64 %"49" to ptr addrspace(1) - store i64 %"50", ptr addrspace(1) %"55", align 4 + store i64 %"50", ptr addrspace(1) %"55", align 8 ret void } diff --git a/ptx/src/test/ll/shared_unify_extern.ll b/ptx/src/test/ll/shared_unify_extern.ll index 4020f92..1af840e 100644 --- a/ptx/src/test/ll/shared_unify_extern.ll +++ b/ptx/src/test/ll/shared_unify_extern.ll @@ -11,15 +11,15 @@ define i64 @add() #0 { br label %"41" "41": ; preds = %1 - %"49" = load i64, ptr addrspace(3) @shared_mod, align 4 - store i64 %"49", ptr addrspace(5) %"47", align 4 - %"50" = load i64, ptr addrspace(3) @shared_ex, align 4 - store i64 %"50", ptr addrspace(5) %"48", align 4 - %"52" = load i64, ptr addrspace(5) %"48", align 4 - %"53" = load i64, ptr addrspace(5) %"47", align 4 + %"49" = load i64, ptr addrspace(3) @shared_mod, align 8 + store i64 %"49", ptr addrspace(5) %"47", align 8 + %"50" = load i64, ptr addrspace(3) @shared_ex, align 8 + store i64 %"50", ptr addrspace(5) %"48", align 8 + %"52" = load i64, ptr addrspace(5) %"48", align 8 + %"53" = load i64, ptr addrspace(5) %"47", align 8 %"75" = add i64 %"52", %"53" - store i64 %"75", ptr addrspace(5) %"46", align 4 - %2 = load i64, ptr addrspace(5) %"46", align 4 + store i64 %"75", ptr addrspace(5) %"46", align 8 + %2 = load i64, ptr addrspace(5) %"46", align 8 ret i64 %2 } @@ -31,13 +31,13 @@ define i64 @set_shared_temp1(i64 %"15") #0 { br label %"42" "42": ; preds = %1 - store i64 %"15", ptr addrspace(3) @shared_ex, align 4 + store i64 %"15", ptr addrspace(3) @shared_ex, align 8 %"55" = call i64 @add() - store i64 %"55", ptr addrspace(5) %"54", align 4 + store i64 %"55", ptr addrspace(5) %"54", align 8 br label %"43" "43": ; preds = %"42" - %2 = load i64, ptr addrspace(5) %"54", align 4 + %2 = load i64, ptr addrspace(5) %"54", align 8 ret i64 %2 } @@ -52,31 +52,31 @@ define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"56" br label %"44" "44": ; preds = %1 - %"62" = load i64, ptr addrspace(4) %"56", align 4 - store i64 %"62", ptr addrspace(5) %"58", align 4 - %"63" = load i64, ptr addrspace(4) %"57", align 4 - store i64 %"63", ptr addrspace(5) %"59", align 4 - %"65" = load i64, ptr addrspace(5) %"58", align 4 + %"62" = load i64, ptr addrspace(4) %"56", align 8 + store i64 %"62", ptr addrspace(5) %"58", align 8 + %"63" = load i64, ptr addrspace(4) %"57", align 8 + store i64 %"63", ptr addrspace(5) %"59", align 8 + %"65" = load i64, ptr addrspace(5) %"58", align 8 %"78" = inttoptr i64 %"65" to ptr addrspace(1) - %"64" = load i64, ptr addrspace(1) %"78", align 4 - store i64 %"64", ptr addrspace(5) %"60", align 4 - %"66" = load i64, ptr addrspace(5) %"58", align 4 + %"64" = load i64, ptr addrspace(1) %"78", align 8 + store i64 %"64", ptr addrspace(5) %"60", align 8 + %"66" = load i64, ptr addrspace(5) %"58", align 8 %"79" = inttoptr i64 %"66" to ptr addrspace(1) %"40" = getelementptr inbounds i8, ptr addrspace(1) %"79", i64 8 - %"67" = load i64, ptr addrspace(1) %"40", align 4 - store i64 %"67", ptr addrspace(5) %"61", align 4 - %"68" = load i64, ptr addrspace(5) %"61", align 4 - store i64 %"68", ptr addrspace(3) @shared_mod, align 4 - %"70" = load i64, ptr addrspace(5) %"60", align 4 + %"67" = load i64, ptr addrspace(1) %"40", align 8 + store i64 %"67", ptr addrspace(5) %"61", align 8 + %"68" = load i64, ptr addrspace(5) %"61", align 8 + store i64 %"68", ptr addrspace(3) @shared_mod, align 8 + %"70" = load i64, ptr addrspace(5) %"60", align 8 %"81" = call i64 @set_shared_temp1(i64 %"70") - store i64 %"81", ptr addrspace(5) %"61", align 4 + store i64 %"81", ptr addrspace(5) %"61", align 8 br label %"45" "45": ; preds = %"44" - %"71" = load i64, ptr addrspace(5) %"59", align 4 - %"72" = load i64, ptr addrspace(5) %"61", align 4 + %"71" = load i64, ptr addrspace(5) %"59", align 8 + %"72" = load i64, ptr addrspace(5) %"61", align 8 %"83" = inttoptr i64 %"71" to ptr - store i64 %"72", ptr %"83", align 4 + store i64 %"72", ptr %"83", align 8 ret void } diff --git a/ptx/src/test/ll/shared_unify_local.ll b/ptx/src/test/ll/shared_unify_local.ll index ef4b605..f211134 100644 --- a/ptx/src/test/ll/shared_unify_local.ll +++ b/ptx/src/test/ll/shared_unify_local.ll @@ -10,14 +10,14 @@ define i64 @add(i64 %"10") #0 { br label %"42" "42": ; preds = %1 - store i64 %"10", ptr addrspace(3) @shared_mod, align 4 - %"49" = load i64, ptr addrspace(3) @shared_mod, align 4 - store i64 %"49", ptr addrspace(5) %"48", align 4 - %"101" = load i64, ptr addrspace(3) @shared_ex, align 4 - %"51" = load i64, ptr addrspace(5) %"48", align 4 + store i64 %"10", ptr addrspace(3) @shared_mod, align 8 + %"49" = load i64, ptr addrspace(3) @shared_mod, align 8 + store i64 %"49", ptr addrspace(5) %"48", align 8 + %"101" = load i64, ptr addrspace(3) @shared_ex, align 8 + %"51" = load i64, ptr addrspace(5) %"48", align 8 %"72" = add i64 %"101", %"51" - store i64 %"72", ptr addrspace(5) %"47", align 4 - %2 = load i64, ptr addrspace(5) %"47", align 4 + store i64 %"72", ptr addrspace(5) %"47", align 8 + %2 = load i64, ptr addrspace(5) %"47", align 8 ret i64 %2 } @@ -29,13 +29,13 @@ define i64 @set_shared_temp1(i64 %"15", i64 %"16") #0 { br label %"43" "43": ; preds = %1 - store i64 %"15", ptr addrspace(3) @shared_ex, align 4 + store i64 %"15", ptr addrspace(3) @shared_ex, align 8 %"53" = call i64 @add(i64 %"16") - store i64 %"53", ptr addrspace(5) %"52", align 4 + store i64 %"53", ptr addrspace(5) %"52", align 8 br label %"44" "44": ; preds = %"43" - %2 = load i64, ptr addrspace(5) %"52", align 4 + %2 = load i64, ptr addrspace(5) %"52", align 8 ret i64 %2 } @@ -50,30 +50,30 @@ define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"54", br label %"45" "45": ; preds = %1 - %"60" = load i64, ptr addrspace(4) %"54", align 4 - store i64 %"60", ptr addrspace(5) %"56", align 4 - %"61" = load i64, ptr addrspace(4) %"55", align 4 - store i64 %"61", ptr addrspace(5) %"57", align 4 - %"63" = load i64, ptr addrspace(5) %"56", align 4 + %"60" = load i64, ptr addrspace(4) %"54", align 8 + store i64 %"60", ptr addrspace(5) %"56", align 8 + %"61" = load i64, ptr addrspace(4) %"55", align 8 + store i64 %"61", ptr addrspace(5) %"57", align 8 + %"63" = load i64, ptr addrspace(5) %"56", align 8 %"75" = inttoptr i64 %"63" to ptr addrspace(1) - %"62" = load i64, ptr addrspace(1) %"75", align 4 - store i64 %"62", ptr addrspace(5) %"58", align 4 - %"64" = load i64, ptr addrspace(5) %"56", align 4 + %"62" = load i64, ptr addrspace(1) %"75", align 8 + store i64 %"62", ptr addrspace(5) %"58", align 8 + %"64" = load i64, ptr addrspace(5) %"56", align 8 %"76" = inttoptr i64 %"64" to ptr addrspace(1) %"41" = getelementptr inbounds i8, ptr addrspace(1) %"76", i64 8 - %"65" = load i64, ptr addrspace(1) %"41", align 4 - store i64 %"65", ptr addrspace(5) %"59", align 4 - %"67" = load i64, ptr addrspace(5) %"58", align 4 - %"68" = load i64, ptr addrspace(5) %"59", align 4 + %"65" = load i64, ptr addrspace(1) %"41", align 8 + store i64 %"65", ptr addrspace(5) %"59", align 8 + %"67" = load i64, ptr addrspace(5) %"58", align 8 + %"68" = load i64, ptr addrspace(5) %"59", align 8 %"77" = call i64 @set_shared_temp1(i64 %"67", i64 %"68") - store i64 %"77", ptr addrspace(5) %"59", align 4 + store i64 %"77", ptr addrspace(5) %"59", align 8 br label %"46" "46": ; preds = %"45" - %"69" = load i64, ptr addrspace(5) %"57", align 4 - %"70" = load i64, ptr addrspace(5) %"59", align 4 + %"69" = load i64, ptr addrspace(5) %"57", align 8 + %"70" = load i64, ptr addrspace(5) %"59", align 8 %"79" = inttoptr i64 %"69" to ptr - store i64 %"70", ptr %"79", align 4 + store i64 %"70", ptr %"79", align 8 ret void } diff --git a/ptx/src/test/ll/shared_variable.ll b/ptx/src/test/ll/shared_variable.ll index 821ac7e..4ea2cb1 100644 --- a/ptx/src/test/ll/shared_variable.ll +++ b/ptx/src/test/ll/shared_variable.ll @@ -11,22 +11,22 @@ define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"32", pt br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr addrspace(1) - %"40" = load i64, ptr addrspace(1) %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"36", align 4 - store i64 %"42", ptr addrspace(3) @shared_mem1, align 4 - %"43" = load i64, ptr addrspace(3) @shared_mem1, align 4 - store i64 %"43", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + %"40" = load i64, ptr addrspace(1) %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"36", align 8 + store i64 %"42", ptr addrspace(3) @shared_mem1, align 8 + %"43" = load i64, ptr addrspace(3) @shared_mem1, align 8 + store i64 %"43", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"49" = inttoptr i64 %"44" to ptr addrspace(1) - store i64 %"45", ptr addrspace(1) %"49", align 4 + store i64 %"45", ptr addrspace(1) %"49", align 8 ret void } diff --git a/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll b/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll index da51305..45d21e4 100644 --- a/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_bfly_b32_pred.ll @@ -14,8 +14,8 @@ define amdgpu_kernel void @shfl_sync_bfly_b32_pred(ptr addrspace(4) byref(i64) % br label %"39" "39": ; preds = %1 - %"48" = load i64, ptr addrspace(4) %"42", align 4 - store i64 %"48", ptr addrspace(5) %"43", align 4 + %"48" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"48", ptr addrspace(5) %"43", align 8 %"33" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) br label %"40" @@ -40,15 +40,15 @@ define amdgpu_kernel void @shfl_sync_bfly_b32_pred(ptr addrspace(4) byref(i64) % "15": ; preds = %"14", %"40" %"57" = load i32, ptr addrspace(5) %"45", align 4 %"56" = zext i32 %"57" to i64 - store i64 %"56", ptr addrspace(5) %"44", align 4 - %"59" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"56", ptr addrspace(5) %"44", align 8 + %"59" = load i64, ptr addrspace(5) %"44", align 8 %"58" = mul i64 %"59", 4 - store i64 %"58", ptr addrspace(5) %"44", align 4 - %"61" = load i64, ptr addrspace(5) %"43", align 4 - %"62" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"58", ptr addrspace(5) %"44", align 8 + %"61" = load i64, ptr addrspace(5) %"43", align 8 + %"62" = load i64, ptr addrspace(5) %"44", align 8 %"60" = add i64 %"61", %"62" - store i64 %"60", ptr addrspace(5) %"43", align 4 - %"63" = load i64, ptr addrspace(5) %"43", align 4 + store i64 %"60", ptr addrspace(5) %"43", align 8 + %"63" = load i64, ptr addrspace(5) %"43", align 8 %"64" = load i32, ptr addrspace(5) %"46", align 4 %"67" = inttoptr i64 %"63" to ptr store i32 %"64", ptr %"67", align 4 diff --git a/ptx/src/test/ll/shfl_sync_down_b32_pred.ll b/ptx/src/test/ll/shfl_sync_down_b32_pred.ll index 2f9edef..2e1ce34 100644 --- a/ptx/src/test/ll/shfl_sync_down_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_down_b32_pred.ll @@ -14,8 +14,8 @@ define amdgpu_kernel void @shfl_sync_down_b32_pred(ptr addrspace(4) byref(i64) % br label %"39" "39": ; preds = %1 - %"48" = load i64, ptr addrspace(4) %"42", align 4 - store i64 %"48", ptr addrspace(5) %"43", align 4 + %"48" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"48", ptr addrspace(5) %"43", align 8 %"33" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) br label %"40" @@ -40,15 +40,15 @@ define amdgpu_kernel void @shfl_sync_down_b32_pred(ptr addrspace(4) byref(i64) % "15": ; preds = %"14", %"40" %"57" = load i32, ptr addrspace(5) %"45", align 4 %"56" = zext i32 %"57" to i64 - store i64 %"56", ptr addrspace(5) %"44", align 4 - %"59" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"56", ptr addrspace(5) %"44", align 8 + %"59" = load i64, ptr addrspace(5) %"44", align 8 %"58" = mul i64 %"59", 4 - store i64 %"58", ptr addrspace(5) %"44", align 4 - %"61" = load i64, ptr addrspace(5) %"43", align 4 - %"62" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"58", ptr addrspace(5) %"44", align 8 + %"61" = load i64, ptr addrspace(5) %"43", align 8 + %"62" = load i64, ptr addrspace(5) %"44", align 8 %"60" = add i64 %"61", %"62" - store i64 %"60", ptr addrspace(5) %"43", align 4 - %"63" = load i64, ptr addrspace(5) %"43", align 4 + store i64 %"60", ptr addrspace(5) %"43", align 8 + %"63" = load i64, ptr addrspace(5) %"43", align 8 %"64" = load i32, ptr addrspace(5) %"46", align 4 %"67" = inttoptr i64 %"63" to ptr store i32 %"64", ptr %"67", align 4 diff --git a/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll b/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll index e7ac9c6..6c58633 100644 --- a/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_idx_b32_pred.ll @@ -14,8 +14,8 @@ define amdgpu_kernel void @shfl_sync_idx_b32_pred(ptr addrspace(4) byref(i64) %" br label %"39" "39": ; preds = %1 - %"48" = load i64, ptr addrspace(4) %"42", align 4 - store i64 %"48", ptr addrspace(5) %"43", align 4 + %"48" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"48", ptr addrspace(5) %"43", align 8 %"33" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) br label %"40" @@ -40,15 +40,15 @@ define amdgpu_kernel void @shfl_sync_idx_b32_pred(ptr addrspace(4) byref(i64) %" "15": ; preds = %"14", %"40" %"57" = load i32, ptr addrspace(5) %"45", align 4 %"56" = zext i32 %"57" to i64 - store i64 %"56", ptr addrspace(5) %"44", align 4 - %"59" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"56", ptr addrspace(5) %"44", align 8 + %"59" = load i64, ptr addrspace(5) %"44", align 8 %"58" = mul i64 %"59", 4 - store i64 %"58", ptr addrspace(5) %"44", align 4 - %"61" = load i64, ptr addrspace(5) %"43", align 4 - %"62" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"58", ptr addrspace(5) %"44", align 8 + %"61" = load i64, ptr addrspace(5) %"43", align 8 + %"62" = load i64, ptr addrspace(5) %"44", align 8 %"60" = add i64 %"61", %"62" - store i64 %"60", ptr addrspace(5) %"43", align 4 - %"63" = load i64, ptr addrspace(5) %"43", align 4 + store i64 %"60", ptr addrspace(5) %"43", align 8 + %"63" = load i64, ptr addrspace(5) %"43", align 8 %"64" = load i32, ptr addrspace(5) %"46", align 4 %"67" = inttoptr i64 %"63" to ptr store i32 %"64", ptr %"67", align 4 diff --git a/ptx/src/test/ll/shfl_sync_mode_b32.ll b/ptx/src/test/ll/shfl_sync_mode_b32.ll index a65ad1e..913c4aa 100644 --- a/ptx/src/test/ll/shfl_sync_mode_b32.ll +++ b/ptx/src/test/ll/shfl_sync_mode_b32.ll @@ -20,8 +20,8 @@ define amdgpu_kernel void @shfl_sync_mode_b32(ptr addrspace(4) byref(i64) %"48") br label %"45" "45": ; preds = %1 - %"54" = load i64, ptr addrspace(4) %"48", align 4 - store i64 %"54", ptr addrspace(5) %"49", align 4 + %"54" = load i64, ptr addrspace(4) %"48", align 8 + store i64 %"54", ptr addrspace(5) %"49", align 8 %"31" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) br label %"46" @@ -55,15 +55,15 @@ define amdgpu_kernel void @shfl_sync_mode_b32(ptr addrspace(4) byref(i64) %"48") store i32 %"72", ptr addrspace(5) %"53", align 4 %"76" = load i32, ptr addrspace(5) %"51", align 4 %"75" = zext i32 %"76" to i64 - store i64 %"75", ptr addrspace(5) %"50", align 4 - %"78" = load i64, ptr addrspace(5) %"50", align 4 + store i64 %"75", ptr addrspace(5) %"50", align 8 + %"78" = load i64, ptr addrspace(5) %"50", align 8 %"77" = mul i64 %"78", 4 - store i64 %"77", ptr addrspace(5) %"50", align 4 - %"80" = load i64, ptr addrspace(5) %"49", align 4 - %"81" = load i64, ptr addrspace(5) %"50", align 4 + store i64 %"77", ptr addrspace(5) %"50", align 8 + %"80" = load i64, ptr addrspace(5) %"49", align 8 + %"81" = load i64, ptr addrspace(5) %"50", align 8 %"79" = add i64 %"80", %"81" - store i64 %"79", ptr addrspace(5) %"49", align 4 - %"82" = load i64, ptr addrspace(5) %"49", align 4 + store i64 %"79", ptr addrspace(5) %"49", align 8 + %"82" = load i64, ptr addrspace(5) %"49", align 8 %"83" = load i32, ptr addrspace(5) %"53", align 4 %"92" = inttoptr i64 %"82" to ptr store i32 %"83", ptr %"92", align 4 diff --git a/ptx/src/test/ll/shfl_sync_up_b32_pred.ll b/ptx/src/test/ll/shfl_sync_up_b32_pred.ll index 399b03a..a75f4bf 100644 --- a/ptx/src/test/ll/shfl_sync_up_b32_pred.ll +++ b/ptx/src/test/ll/shfl_sync_up_b32_pred.ll @@ -14,8 +14,8 @@ define amdgpu_kernel void @shfl_sync_up_b32_pred(ptr addrspace(4) byref(i64) %"4 br label %"39" "39": ; preds = %1 - %"48" = load i64, ptr addrspace(4) %"42", align 4 - store i64 %"48", ptr addrspace(5) %"43", align 4 + %"48" = load i64, ptr addrspace(4) %"42", align 8 + store i64 %"48", ptr addrspace(5) %"43", align 8 %"33" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) br label %"40" @@ -40,15 +40,15 @@ define amdgpu_kernel void @shfl_sync_up_b32_pred(ptr addrspace(4) byref(i64) %"4 "15": ; preds = %"14", %"40" %"57" = load i32, ptr addrspace(5) %"45", align 4 %"56" = zext i32 %"57" to i64 - store i64 %"56", ptr addrspace(5) %"44", align 4 - %"59" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"56", ptr addrspace(5) %"44", align 8 + %"59" = load i64, ptr addrspace(5) %"44", align 8 %"58" = mul i64 %"59", 4 - store i64 %"58", ptr addrspace(5) %"44", align 4 - %"61" = load i64, ptr addrspace(5) %"43", align 4 - %"62" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"58", ptr addrspace(5) %"44", align 8 + %"61" = load i64, ptr addrspace(5) %"43", align 8 + %"62" = load i64, ptr addrspace(5) %"44", align 8 %"60" = add i64 %"61", %"62" - store i64 %"60", ptr addrspace(5) %"43", align 4 - %"63" = load i64, ptr addrspace(5) %"43", align 4 + store i64 %"60", ptr addrspace(5) %"43", align 8 + %"63" = load i64, ptr addrspace(5) %"43", align 8 %"64" = load i32, ptr addrspace(5) %"46", align 4 %"67" = inttoptr i64 %"63" to ptr store i32 %"64", ptr %"67", align 4 diff --git a/ptx/src/test/ll/shl.ll b/ptx/src/test/ll/shl.ll index d1e8022..6c9b505 100644 --- a/ptx/src/test/ll/shl.ll +++ b/ptx/src/test/ll/shl.ll @@ -9,22 +9,22 @@ define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"32", ptr addrspace( br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr - %"40" = load i64, ptr %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %2 = shl i64 %"43", 2 %"47" = select i1 false, i64 0, i64 %2 - store i64 %"47", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"47", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"49" = inttoptr i64 %"44" to ptr - store i64 %"45", ptr %"49", align 4 + store i64 %"45", ptr %"49", align 8 ret void } diff --git a/ptx/src/test/ll/shr.ll b/ptx/src/test/ll/shr.ll index bbb8f9c..bc0acae 100644 --- a/ptx/src/test/ll/shr.ll +++ b/ptx/src/test/ll/shr.ll @@ -8,11 +8,11 @@ define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"31", ptr addrspace( br label %"30" "30": ; preds = %1 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"37" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"37" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"37", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"44" = inttoptr i64 %"39" to ptr %"38" = load i32, ptr %"44", align 4 store i32 %"38", ptr addrspace(5) %"35", align 4 @@ -20,7 +20,7 @@ define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"31", ptr addrspace( %2 = ashr i32 %"41", 1 %"40" = select i1 false, i32 0, i32 %2 store i32 %"40", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %"43" = load i32, ptr addrspace(5) %"35", align 4 %"45" = inttoptr i64 %"42" to ptr store i32 %"43", ptr %"45", align 4 diff --git a/ptx/src/test/ll/sign_extend.ll b/ptx/src/test/ll/sign_extend.ll index 1d8ed20..ee7d5c3 100644 --- a/ptx/src/test/ll/sign_extend.ll +++ b/ptx/src/test/ll/sign_extend.ll @@ -8,16 +8,16 @@ define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"30", ptr ad br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"42" = inttoptr i64 %"38" to ptr %"41" = load i16, ptr %"42", align 2 %"37" = sext i16 %"41" to i32 store i32 %"37", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(5) %"33", align 4 + %"39" = load i64, ptr addrspace(5) %"33", align 8 %"40" = load i32, ptr addrspace(5) %"34", align 4 %"43" = inttoptr i64 %"39" to ptr store i32 %"40", ptr %"43", align 4 diff --git a/ptx/src/test/ll/sin.ll b/ptx/src/test/ll/sin.ll index 922256b..30441ab 100644 --- a/ptx/src/test/ll/sin.ll +++ b/ptx/src/test/ll/sin.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"30", ptr addrspace( br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load float, ptr %"43", align 4 store float %"37", ptr addrspace(5) %"34", align 4 %"40" = load float, ptr addrspace(5) %"34", align 4 %"39" = call afn float @llvm.sin.f32(float %"40") store float %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load float, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store float %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/sqrt.ll b/ptx/src/test/ll/sqrt.ll index 2497375..4c7ce98 100644 --- a/ptx/src/test/ll/sqrt.ll +++ b/ptx/src/test/ll/sqrt.ll @@ -8,18 +8,18 @@ define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"30", ptr addrspace br label %"29" "29": ; preds = %1 - %"35" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"35", ptr addrspace(5) %"32", align 4 - %"36" = load i64, ptr addrspace(4) %"31", align 4 - store i64 %"36", ptr addrspace(5) %"33", align 4 - %"38" = load i64, ptr addrspace(5) %"32", align 4 + %"35" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"35", ptr addrspace(5) %"32", align 8 + %"36" = load i64, ptr addrspace(4) %"31", align 8 + store i64 %"36", ptr addrspace(5) %"33", align 8 + %"38" = load i64, ptr addrspace(5) %"32", align 8 %"43" = inttoptr i64 %"38" to ptr %"37" = load float, ptr %"43", align 4 store float %"37", ptr addrspace(5) %"34", align 4 %"40" = load float, ptr addrspace(5) %"34", align 4 %"39" = call float @llvm.amdgcn.sqrt.f32(float %"40") store float %"39", ptr addrspace(5) %"34", align 4 - %"41" = load i64, ptr addrspace(5) %"33", align 4 + %"41" = load i64, ptr addrspace(5) %"33", align 8 %"42" = load float, ptr addrspace(5) %"34", align 4 %"44" = inttoptr i64 %"41" to ptr store float %"42", ptr %"44", align 4 diff --git a/ptx/src/test/ll/stateful_ld_st_ntid.ll b/ptx/src/test/ll/stateful_ld_st_ntid.ll index c100da6..1f9d754 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid.ll @@ -12,15 +12,15 @@ define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"36" br label %"33" "33": ; preds = %1 - %"62" = load i64, ptr addrspace(4) %"36", align 4 - store i64 %"62", ptr addrspace(5) %"38", align 4 - %"63" = load i64, ptr addrspace(4) %"37", align 4 - store i64 %"63", ptr addrspace(5) %"39", align 4 - %"46" = load i64, ptr addrspace(5) %"38", align 4 + %"62" = load i64, ptr addrspace(4) %"36", align 8 + store i64 %"62", ptr addrspace(5) %"38", align 8 + %"63" = load i64, ptr addrspace(4) %"37", align 8 + store i64 %"63", ptr addrspace(5) %"39", align 8 + %"46" = load i64, ptr addrspace(5) %"38", align 8 %2 = inttoptr i64 %"46" to ptr %"45" = addrspacecast ptr %2 to ptr addrspace(1) store ptr addrspace(1) %"45", ptr addrspace(5) %"38", align 8 - %"48" = load i64, ptr addrspace(5) %"39", align 4 + %"48" = load i64, ptr addrspace(5) %"39", align 8 %3 = inttoptr i64 %"48" to ptr %"47" = addrspacecast ptr %3 to ptr addrspace(1) store ptr addrspace(1) %"47", ptr addrspace(5) %"39", align 8 @@ -31,23 +31,23 @@ define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"36" store i32 %"32", ptr addrspace(5) %"40", align 4 %"51" = load i32, ptr addrspace(5) %"40", align 4 %"50" = zext i32 %"51" to i64 - store i64 %"50", ptr addrspace(5) %"41", align 4 - %"53" = load i64, ptr addrspace(5) %"38", align 4 - %"54" = load i64, ptr addrspace(5) %"41", align 4 + store i64 %"50", ptr addrspace(5) %"41", align 8 + %"53" = load i64, ptr addrspace(5) %"38", align 8 + %"54" = load i64, ptr addrspace(5) %"41", align 8 %"64" = add i64 %"53", %"54" - store i64 %"64", ptr addrspace(5) %"38", align 4 - %"56" = load i64, ptr addrspace(5) %"39", align 4 - %"57" = load i64, ptr addrspace(5) %"41", align 4 + store i64 %"64", ptr addrspace(5) %"38", align 8 + %"56" = load i64, ptr addrspace(5) %"39", align 8 + %"57" = load i64, ptr addrspace(5) %"41", align 8 %"66" = add i64 %"56", %"57" - store i64 %"66", ptr addrspace(5) %"39", align 4 - %"59" = load i64, ptr addrspace(5) %"38", align 4 + store i64 %"66", ptr addrspace(5) %"39", align 8 + %"59" = load i64, ptr addrspace(5) %"38", align 8 %"68" = inttoptr i64 %"59" to ptr addrspace(1) - %"58" = load i64, ptr addrspace(1) %"68", align 4 - store i64 %"58", ptr addrspace(5) %"42", align 4 - %"60" = load i64, ptr addrspace(5) %"39", align 4 - %"61" = load i64, ptr addrspace(5) %"42", align 4 + %"58" = load i64, ptr addrspace(1) %"68", align 8 + store i64 %"58", ptr addrspace(5) %"42", align 8 + %"60" = load i64, ptr addrspace(5) %"39", align 8 + %"61" = load i64, ptr addrspace(5) %"42", align 8 %"69" = inttoptr i64 %"60" to ptr addrspace(1) - store i64 %"61", ptr addrspace(1) %"69", align 4 + store i64 %"61", ptr addrspace(1) %"69", align 8 ret void } diff --git a/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll b/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll index c1a59c6..eb5c4c1 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid_chain.ll @@ -16,15 +16,15 @@ define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) br label %"37" "37": ; preds = %1 - %"70" = load i64, ptr addrspace(4) %"40", align 4 - store i64 %"70", ptr addrspace(5) %"42", align 4 - %"71" = load i64, ptr addrspace(4) %"41", align 4 - store i64 %"71", ptr addrspace(5) %"45", align 4 - %"54" = load i64, ptr addrspace(5) %"42", align 4 + %"70" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"70", ptr addrspace(5) %"42", align 8 + %"71" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"71", ptr addrspace(5) %"45", align 8 + %"54" = load i64, ptr addrspace(5) %"42", align 8 %2 = inttoptr i64 %"54" to ptr %"53" = addrspacecast ptr %2 to ptr addrspace(1) store ptr addrspace(1) %"53", ptr addrspace(5) %"43", align 8 - %"56" = load i64, ptr addrspace(5) %"45", align 4 + %"56" = load i64, ptr addrspace(5) %"45", align 8 %3 = inttoptr i64 %"56" to ptr %"55" = addrspacecast ptr %3 to ptr addrspace(1) store ptr addrspace(1) %"55", ptr addrspace(5) %"46", align 8 @@ -35,23 +35,23 @@ define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) store i32 %"36", ptr addrspace(5) %"48", align 4 %"59" = load i32, ptr addrspace(5) %"48", align 4 %"58" = zext i32 %"59" to i64 - store i64 %"58", ptr addrspace(5) %"49", align 4 - %"61" = load i64, ptr addrspace(5) %"43", align 4 - %"62" = load i64, ptr addrspace(5) %"49", align 4 + store i64 %"58", ptr addrspace(5) %"49", align 8 + %"61" = load i64, ptr addrspace(5) %"43", align 8 + %"62" = load i64, ptr addrspace(5) %"49", align 8 %"72" = add i64 %"61", %"62" - store i64 %"72", ptr addrspace(5) %"44", align 4 - %"64" = load i64, ptr addrspace(5) %"46", align 4 - %"65" = load i64, ptr addrspace(5) %"49", align 4 + store i64 %"72", ptr addrspace(5) %"44", align 8 + %"64" = load i64, ptr addrspace(5) %"46", align 8 + %"65" = load i64, ptr addrspace(5) %"49", align 8 %"74" = add i64 %"64", %"65" - store i64 %"74", ptr addrspace(5) %"47", align 4 - %"67" = load i64, ptr addrspace(5) %"44", align 4 + store i64 %"74", ptr addrspace(5) %"47", align 8 + %"67" = load i64, ptr addrspace(5) %"44", align 8 %"76" = inttoptr i64 %"67" to ptr addrspace(1) - %"66" = load i64, ptr addrspace(1) %"76", align 4 - store i64 %"66", ptr addrspace(5) %"50", align 4 - %"68" = load i64, ptr addrspace(5) %"47", align 4 - %"69" = load i64, ptr addrspace(5) %"50", align 4 + %"66" = load i64, ptr addrspace(1) %"76", align 8 + store i64 %"66", ptr addrspace(5) %"50", align 8 + %"68" = load i64, ptr addrspace(5) %"47", align 8 + %"69" = load i64, ptr addrspace(5) %"50", align 8 %"77" = inttoptr i64 %"68" to ptr addrspace(1) - store i64 %"69", ptr addrspace(1) %"77", align 4 + store i64 %"69", ptr addrspace(1) %"77", align 8 ret void } diff --git a/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll b/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll index dd54c84..e85ad52 100644 --- a/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll +++ b/ptx/src/test/ll/stateful_ld_st_ntid_sub.ll @@ -16,15 +16,15 @@ define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) % br label %"41" "41": ; preds = %1 - %"74" = load i64, ptr addrspace(4) %"44", align 4 - store i64 %"74", ptr addrspace(5) %"46", align 4 - %"75" = load i64, ptr addrspace(4) %"45", align 4 - store i64 %"75", ptr addrspace(5) %"49", align 4 - %"58" = load i64, ptr addrspace(5) %"46", align 4 + %"74" = load i64, ptr addrspace(4) %"44", align 8 + store i64 %"74", ptr addrspace(5) %"46", align 8 + %"75" = load i64, ptr addrspace(4) %"45", align 8 + store i64 %"75", ptr addrspace(5) %"49", align 8 + %"58" = load i64, ptr addrspace(5) %"46", align 8 %2 = inttoptr i64 %"58" to ptr %"57" = addrspacecast ptr %2 to ptr addrspace(1) store ptr addrspace(1) %"57", ptr addrspace(5) %"47", align 8 - %"60" = load i64, ptr addrspace(5) %"49", align 4 + %"60" = load i64, ptr addrspace(5) %"49", align 8 %3 = inttoptr i64 %"60" to ptr %"59" = addrspacecast ptr %3 to ptr addrspace(1) store ptr addrspace(1) %"59", ptr addrspace(5) %"50", align 8 @@ -35,25 +35,25 @@ define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) % store i32 %"36", ptr addrspace(5) %"52", align 4 %"63" = load i32, ptr addrspace(5) %"52", align 4 %"62" = zext i32 %"63" to i64 - store i64 %"62", ptr addrspace(5) %"53", align 4 - %"65" = load i64, ptr addrspace(5) %"47", align 4 - %"66" = load i64, ptr addrspace(5) %"53", align 4 + store i64 %"62", ptr addrspace(5) %"53", align 8 + %"65" = load i64, ptr addrspace(5) %"47", align 8 + %"66" = load i64, ptr addrspace(5) %"53", align 8 %"76" = sub i64 %"65", %"66" - store i64 %"76", ptr addrspace(5) %"48", align 4 - %"68" = load i64, ptr addrspace(5) %"50", align 4 - %"69" = load i64, ptr addrspace(5) %"53", align 4 + store i64 %"76", ptr addrspace(5) %"48", align 8 + %"68" = load i64, ptr addrspace(5) %"50", align 8 + %"69" = load i64, ptr addrspace(5) %"53", align 8 %"79" = sub i64 %"68", %"69" - store i64 %"79", ptr addrspace(5) %"51", align 4 - %"70" = load i64, ptr addrspace(5) %"48", align 4 + store i64 %"79", ptr addrspace(5) %"51", align 8 + %"70" = load i64, ptr addrspace(5) %"48", align 8 %"82" = inttoptr i64 %"70" to ptr addrspace(1) %"38" = getelementptr inbounds i8, ptr addrspace(1) %"82", i64 0 - %"71" = load i64, ptr addrspace(1) %"38", align 4 - store i64 %"71", ptr addrspace(5) %"54", align 4 - %"72" = load i64, ptr addrspace(5) %"51", align 4 + %"71" = load i64, ptr addrspace(1) %"38", align 8 + store i64 %"71", ptr addrspace(5) %"54", align 8 + %"72" = load i64, ptr addrspace(5) %"51", align 8 %"83" = inttoptr i64 %"72" to ptr addrspace(1) %"40" = getelementptr inbounds i8, ptr addrspace(1) %"83", i64 0 - %"73" = load i64, ptr addrspace(5) %"54", align 4 - store i64 %"73", ptr addrspace(1) %"40", align 4 + %"73" = load i64, ptr addrspace(5) %"54", align 8 + store i64 %"73", ptr addrspace(1) %"40", align 8 ret void } diff --git a/ptx/src/test/ll/stateful_ld_st_simple.ll b/ptx/src/test/ll/stateful_ld_st_simple.ll index f945ee2..07a1621 100644 --- a/ptx/src/test/ll/stateful_ld_st_simple.ll +++ b/ptx/src/test/ll/stateful_ld_st_simple.ll @@ -10,26 +10,26 @@ define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"3 br label %"31" "31": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"39", ptr addrspace(5) %"34", align 4 - %"40" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"40", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(5) %"34", align 4 + %"39" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"39", ptr addrspace(5) %"34", align 8 + %"40" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"40", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(5) %"34", align 8 %2 = inttoptr i64 %"42" to ptr %"49" = addrspacecast ptr %2 to ptr addrspace(1) store ptr addrspace(1) %"49", ptr addrspace(5) %"36", align 8 - %"44" = load i64, ptr addrspace(5) %"35", align 4 + %"44" = load i64, ptr addrspace(5) %"35", align 8 %3 = inttoptr i64 %"44" to ptr %"51" = addrspacecast ptr %3 to ptr addrspace(1) store ptr addrspace(1) %"51", ptr addrspace(5) %"37", align 8 - %"46" = load i64, ptr addrspace(5) %"36", align 4 + %"46" = load i64, ptr addrspace(5) %"36", align 8 %"53" = inttoptr i64 %"46" to ptr addrspace(1) - %"45" = load i64, ptr addrspace(1) %"53", align 4 - store i64 %"45", ptr addrspace(5) %"38", align 4 - %"47" = load i64, ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"38", align 4 + %"45" = load i64, ptr addrspace(1) %"53", align 8 + store i64 %"45", ptr addrspace(5) %"38", align 8 + %"47" = load i64, ptr addrspace(5) %"37", align 8 + %"48" = load i64, ptr addrspace(5) %"38", align 8 %"54" = inttoptr i64 %"47" to ptr addrspace(1) - store i64 %"48", ptr addrspace(1) %"54", align 4 + store i64 %"48", ptr addrspace(1) %"54", align 8 ret void } diff --git a/ptx/src/test/ll/stateful_neg_offset.ll b/ptx/src/test/ll/stateful_neg_offset.ll index d51943d..fe7d09f 100644 --- a/ptx/src/test/ll/stateful_neg_offset.ll +++ b/ptx/src/test/ll/stateful_neg_offset.ll @@ -11,34 +11,34 @@ define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"33" br label %"32" "32": ; preds = %1 - %"41" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"41", ptr addrspace(5) %"35", align 4 - %"42" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"42", ptr addrspace(5) %"36", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 + %"41" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"41", ptr addrspace(5) %"35", align 8 + %"42" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"42", ptr addrspace(5) %"36", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 %2 = inttoptr i64 %"44" to ptr %"57" = addrspacecast ptr %2 to ptr addrspace(1) store ptr addrspace(1) %"57", ptr addrspace(5) %"37", align 8 - %"46" = load i64, ptr addrspace(5) %"36", align 4 + %"46" = load i64, ptr addrspace(5) %"36", align 8 %3 = inttoptr i64 %"46" to ptr %"59" = addrspacecast ptr %3 to ptr addrspace(1) store ptr addrspace(1) %"59", ptr addrspace(5) %"38", align 8 - %"48" = load i64, ptr addrspace(5) %"37", align 4 - %"49" = load i64, ptr addrspace(5) %"38", align 4 + %"48" = load i64, ptr addrspace(5) %"37", align 8 + %"49" = load i64, ptr addrspace(5) %"38", align 8 %"47" = add i64 %"48", %"49" - store i64 %"47", ptr addrspace(5) %"39", align 4 - %"51" = load i64, ptr addrspace(5) %"37", align 4 - %"52" = load i64, ptr addrspace(5) %"38", align 4 + store i64 %"47", ptr addrspace(5) %"39", align 8 + %"51" = load i64, ptr addrspace(5) %"37", align 8 + %"52" = load i64, ptr addrspace(5) %"38", align 8 %"50" = sub i64 %"51", %"52" - store i64 %"50", ptr addrspace(5) %"39", align 4 - %"54" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"50", ptr addrspace(5) %"39", align 8 + %"54" = load i64, ptr addrspace(5) %"37", align 8 %"61" = inttoptr i64 %"54" to ptr addrspace(1) - %"53" = load i64, ptr addrspace(1) %"61", align 4 - store i64 %"53", ptr addrspace(5) %"40", align 4 - %"55" = load i64, ptr addrspace(5) %"38", align 4 - %"56" = load i64, ptr addrspace(5) %"40", align 4 + %"53" = load i64, ptr addrspace(1) %"61", align 8 + store i64 %"53", ptr addrspace(5) %"40", align 8 + %"55" = load i64, ptr addrspace(5) %"38", align 8 + %"56" = load i64, ptr addrspace(5) %"40", align 8 %"62" = inttoptr i64 %"55" to ptr addrspace(1) - store i64 %"56", ptr addrspace(1) %"62", align 4 + store i64 %"56", ptr addrspace(1) %"62", align 8 ret void } diff --git a/ptx/src/test/ll/sub.ll b/ptx/src/test/ll/sub.ll index eafd223..334d23e 100644 --- a/ptx/src/test/ll/sub.ll +++ b/ptx/src/test/ll/sub.ll @@ -9,21 +9,21 @@ define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"32", ptr addrspace( br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr - %"40" = load i64, ptr %"46", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"43" = load i64, ptr addrspace(5) %"36", align 4 + %"40" = load i64, ptr %"46", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"43" = load i64, ptr addrspace(5) %"36", align 8 %"42" = sub i64 %"43", 1 - store i64 %"42", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 - %"45" = load i64, ptr addrspace(5) %"37", align 4 + store i64 %"42", ptr addrspace(5) %"37", align 8 + %"44" = load i64, ptr addrspace(5) %"35", align 8 + %"45" = load i64, ptr addrspace(5) %"37", align 8 %"47" = inttoptr i64 %"44" to ptr - store i64 %"45", ptr %"47", align 4 + store i64 %"45", ptr %"47", align 8 ret void } diff --git a/ptx/src/test/ll/tid.ll b/ptx/src/test/ll/tid.ll index b15f372..ece8736 100644 --- a/ptx/src/test/ll/tid.ll +++ b/ptx/src/test/ll/tid.ll @@ -18,17 +18,17 @@ define amdgpu_kernel void @tid(ptr addrspace(4) byref(i64) %"34") #1 { store i32 %"30", ptr addrspace(5) %"36", align 4 %"41" = load i32, ptr addrspace(5) %"36", align 4 %"40" = zext i32 %"41" to i64 - store i64 %"40", ptr addrspace(5) %"37", align 4 + store i64 %"40", ptr addrspace(5) %"37", align 8 %"43" = load i32, ptr addrspace(5) %"36", align 4 %"42" = trunc i32 %"43" to i8 store i8 %"42", ptr addrspace(5) %"38", align 1 - %"44" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"44", ptr addrspace(5) %"35", align 4 - %"46" = load i64, ptr addrspace(5) %"35", align 4 - %"47" = load i64, ptr addrspace(5) %"37", align 4 + %"44" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"44", ptr addrspace(5) %"35", align 8 + %"46" = load i64, ptr addrspace(5) %"35", align 8 + %"47" = load i64, ptr addrspace(5) %"37", align 8 %"45" = add i64 %"46", %"47" - store i64 %"45", ptr addrspace(5) %"35", align 4 - %"48" = load i64, ptr addrspace(5) %"35", align 4 + store i64 %"45", ptr addrspace(5) %"35", align 8 + %"48" = load i64, ptr addrspace(5) %"35", align 8 %"49" = load i8, ptr addrspace(5) %"38", align 1 %"50" = inttoptr i64 %"48" to ptr store i8 %"49", ptr %"50", align 1 diff --git a/ptx/src/test/ll/vector.ll b/ptx/src/test/ll/vector.ll index 95cb569..e2b5a0c 100644 --- a/ptx/src/test/ll/vector.ll +++ b/ptx/src/test/ll/vector.ll @@ -49,11 +49,11 @@ define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"67", ptr addrspa br label %"45" "45": ; preds = %1 - %"75" = load i64, ptr addrspace(4) %"67", align 4 - store i64 %"75", ptr addrspace(5) %"69", align 4 - %"76" = load i64, ptr addrspace(4) %"68", align 4 - store i64 %"76", ptr addrspace(5) %"70", align 4 - %"78" = load i64, ptr addrspace(5) %"69", align 4 + %"75" = load i64, ptr addrspace(4) %"67", align 8 + store i64 %"75", ptr addrspace(5) %"69", align 8 + %"76" = load i64, ptr addrspace(4) %"68", align 8 + store i64 %"76", ptr addrspace(5) %"70", align 8 + %"78" = load i64, ptr addrspace(5) %"69", align 8 %"85" = inttoptr i64 %"78" to ptr %"77" = load <2 x i32>, ptr %"85", align 8 store <2 x i32> %"77", ptr addrspace(5) %"71", align 8 @@ -65,8 +65,8 @@ define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"67", ptr addrspa "46": ; preds = %"45" %"82" = load <2 x i32>, ptr addrspace(5) %"71", align 8 %"86" = bitcast <2 x i32> %"82" to i64 - store i64 %"86", ptr addrspace(5) %"74", align 4 - %"83" = load i64, ptr addrspace(5) %"70", align 4 + store i64 %"86", ptr addrspace(5) %"74", align 8 + %"83" = load i64, ptr addrspace(5) %"70", align 8 %"84" = load <2 x i32>, ptr addrspace(5) %"71", align 8 %"87" = inttoptr i64 %"83" to ptr store <2 x i32> %"84", ptr %"87", align 8 diff --git a/ptx/src/test/ll/vector4.ll b/ptx/src/test/ll/vector4.ll index cf32621..617a574 100644 --- a/ptx/src/test/ll/vector4.ll +++ b/ptx/src/test/ll/vector4.ll @@ -9,18 +9,18 @@ define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"32", ptr addrsp br label %"31" "31": ; preds = %1 - %"38" = load i64, ptr addrspace(4) %"32", align 4 - store i64 %"38", ptr addrspace(5) %"34", align 4 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"41" = load i64, ptr addrspace(5) %"34", align 4 + %"38" = load i64, ptr addrspace(4) %"32", align 8 + store i64 %"38", ptr addrspace(5) %"34", align 8 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"41" = load i64, ptr addrspace(5) %"34", align 8 %"46" = inttoptr i64 %"41" to ptr %"40" = load <4 x i32>, ptr %"46", align 16 store <4 x i32> %"40", ptr addrspace(5) %"36", align 16 %"42" = load <4 x i32>, ptr addrspace(5) %"36", align 16 %"30" = extractelement <4 x i32> %"42", i8 3 store i32 %"30", ptr addrspace(5) %"37", align 4 - %"44" = load i64, ptr addrspace(5) %"35", align 4 + %"44" = load i64, ptr addrspace(5) %"35", align 8 %"45" = load i32, ptr addrspace(5) %"37", align 4 %"49" = inttoptr i64 %"44" to ptr store i32 %"45", ptr %"49", align 4 diff --git a/ptx/src/test/ll/vector_extract.ll b/ptx/src/test/ll/vector_extract.ll index 9c615ca..ad3e470 100644 --- a/ptx/src/test/ll/vector_extract.ll +++ b/ptx/src/test/ll/vector_extract.ll @@ -12,11 +12,11 @@ define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"40", ptr br label %"39" "39": ; preds = %1 - %"49" = load i64, ptr addrspace(4) %"40", align 4 - store i64 %"49", ptr addrspace(5) %"42", align 4 - %"50" = load i64, ptr addrspace(4) %"41", align 4 - store i64 %"50", ptr addrspace(5) %"43", align 4 - %"51" = load i64, ptr addrspace(5) %"42", align 4 + %"49" = load i64, ptr addrspace(4) %"40", align 8 + store i64 %"49", ptr addrspace(5) %"42", align 8 + %"50" = load i64, ptr addrspace(4) %"41", align 8 + store i64 %"50", ptr addrspace(5) %"43", align 8 + %"51" = load i64, ptr addrspace(5) %"42", align 8 %"79" = inttoptr i64 %"51" to ptr addrspace(1) %"33" = load <4 x i8>, ptr addrspace(1) %"79", align 4 %"80" = extractelement <4 x i8> %"33", i8 0 @@ -77,7 +77,7 @@ define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"40", ptr %9 = insertelement <4 x i8> %8, i8 %"85", i8 1 %10 = insertelement <4 x i8> %9, i8 %"86", i8 2 %"38" = insertelement <4 x i8> %10, i8 %"87", i8 3 - %"78" = load i64, ptr addrspace(5) %"43", align 4 + %"78" = load i64, ptr addrspace(5) %"43", align 8 %"88" = inttoptr i64 %"78" to ptr addrspace(1) store <4 x i8> %"38", ptr addrspace(1) %"88", align 4 ret void diff --git a/ptx/src/test/ll/warp_sz.ll b/ptx/src/test/ll/warp_sz.ll index aac6b34..c0c0f86 100644 --- a/ptx/src/test/ll/warp_sz.ll +++ b/ptx/src/test/ll/warp_sz.ll @@ -6,9 +6,9 @@ define amdgpu_kernel void @warp_sz(ptr addrspace(4) byref(i64) %"29", ptr addrsp br label %"28" "28": ; preds = %1 - %"32" = load i64, ptr addrspace(4) %"30", align 4 - store i64 %"32", ptr addrspace(5) %"31", align 4 - %"33" = load i64, ptr addrspace(5) %"31", align 4 + %"32" = load i64, ptr addrspace(4) %"30", align 8 + store i64 %"32", ptr addrspace(5) %"31", align 8 + %"33" = load i64, ptr addrspace(5) %"31", align 8 %"34" = inttoptr i64 %"33" to ptr store i8 32, ptr %"34", align 1 ret void diff --git a/ptx/src/test/ll/xor.ll b/ptx/src/test/ll/xor.ll index 6f9633d..a8406ff 100644 --- a/ptx/src/test/ll/xor.ll +++ b/ptx/src/test/ll/xor.ll @@ -9,15 +9,15 @@ define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"33", ptr addrspace( br label %"32" "32": ; preds = %1 - %"39" = load i64, ptr addrspace(4) %"33", align 4 - store i64 %"39", ptr addrspace(5) %"35", align 4 - %"40" = load i64, ptr addrspace(4) %"34", align 4 - store i64 %"40", ptr addrspace(5) %"36", align 4 - %"42" = load i64, ptr addrspace(5) %"35", align 4 + %"39" = load i64, ptr addrspace(4) %"33", align 8 + store i64 %"39", ptr addrspace(5) %"35", align 8 + %"40" = load i64, ptr addrspace(4) %"34", align 8 + store i64 %"40", ptr addrspace(5) %"36", align 8 + %"42" = load i64, ptr addrspace(5) %"35", align 8 %"50" = inttoptr i64 %"42" to ptr %"41" = load i32, ptr %"50", align 4 store i32 %"41", ptr addrspace(5) %"37", align 4 - %"43" = load i64, ptr addrspace(5) %"35", align 4 + %"43" = load i64, ptr addrspace(5) %"35", align 8 %"51" = inttoptr i64 %"43" to ptr %"31" = getelementptr inbounds i8, ptr %"51", i64 4 %"44" = load i32, ptr %"31", align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"33", ptr addrspace( %"47" = load i32, ptr addrspace(5) %"38", align 4 %"45" = xor i32 %"46", %"47" store i32 %"45", ptr addrspace(5) %"37", align 4 - %"48" = load i64, ptr addrspace(5) %"36", align 4 + %"48" = load i64, ptr addrspace(5) %"36", align 8 %"49" = load i32, ptr addrspace(5) %"37", align 4 %"52" = inttoptr i64 %"48" to ptr store i32 %"49", ptr %"52", align 4