Merge commit '119b635b9dffccc2de699b188897d8077529b0d6' into inst_fixes

This commit is contained in:
Andrzej Janik
2025-07-23 22:17:53 +00:00
133 changed files with 1516 additions and 1336 deletions

View File

@ -178,11 +178,14 @@ pub fn compile_bitcode(
comgr: &Comgr,
gcn_arch: &CStr,
main_buffer: &[u8],
attributes_buffer: &[u8],
ptx_impl: &[u8],
) -> Result<Vec<u8>, Error> {
let bitcode_data_set = DataSet::new(comgr)?;
let main_bitcode_data = Data::new(comgr, DataKind::Bc, c"zluda.bc", main_buffer)?;
bitcode_data_set.add(&main_bitcode_data)?;
let attributes_bitcode_data = Data::new(comgr, DataKind::Bc, c"attributes.bc", attributes_buffer)?;
bitcode_data_set.add(&attributes_bitcode_data)?;
let stdlib_bitcode_data = Data::new(comgr, DataKind::Bc, c"ptx_impl.bc", ptx_impl)?;
bitcode_data_set.add(&stdlib_bitcode_data)?;
let linking_info = ActionInfo::new(comgr)?;

View File

@ -9,6 +9,8 @@
#include <hip/amd_detail/amd_device_functions.h>
#define FUNC(NAME) __device__ __attribute__((retain)) __zluda_ptx_impl_##NAME
#define ATTR(NAME) __ZLUDA_PTX_IMPL_ATTRIBUTE_##NAME
#define DECLARE_ATTR(TYPE, NAME) extern const TYPE ATTR(NAME) __device__
extern "C"
{
@ -223,6 +225,29 @@ extern "C"
SHFL_SYNC_IMPL(bfly, self ^ delta, >);
SHFL_SYNC_IMPL(idx, (delta & ~section_mask) | subsection, >);
DECLARE_ATTR(uint32_t, CLOCK_RATE);
void FUNC(nanosleep_u32)(uint32_t nanoseconds) {
// clock_rate is in kHz
uint64_t cycles_per_ns = ATTR(CLOCK_RATE) / 1000000;
uint64_t cycles = nanoseconds * cycles_per_ns;
// Avoid small sleep values resulting in s_sleep 0
cycles += 63;
// s_sleep N sleeps for 64 * N cycles
uint64_t sleep_amount = cycles / 64;
// The argument to s_sleep must be a constant
for (size_t i = 0; i < sleep_amount >> 4; i++)
__builtin_amdgcn_s_sleep(16);
if (sleep_amount & 8U)
__builtin_amdgcn_s_sleep(8);
if (sleep_amount & 4U)
__builtin_amdgcn_s_sleep(4);
if (sleep_amount & 2U)
__builtin_amdgcn_s_sleep(2);
if (sleep_amount & 1U)
__builtin_amdgcn_s_sleep(1);
}
void FUNC(__assertfail)(uint64_t message,
uint64_t file,
uint32_t line,

View File

@ -3,4 +3,5 @@ pub(crate) mod pass;
mod test;
pub use pass::to_llvm_module;
pub use pass::Attributes;

View File

@ -138,10 +138,7 @@ pub(crate) fn default_implicit_conversion(
}
}
if instruction_space != operand_space {
default_implicit_conversion_space(
(operand_space, operand_type),
(instruction_space, instruction_type),
)
default_implicit_conversion_space((operand_space, operand_type), instruction_space)
} else if instruction_type != operand_type {
default_implicit_conversion_type(instruction_space, operand_type, instruction_type)
} else {
@ -167,7 +164,7 @@ fn is_addressable(this: ast::StateSpace) -> bool {
// Space is different
fn default_implicit_conversion_space(
(operand_space, operand_type): (ast::StateSpace, &ast::Type),
(instruction_space, instruction_type): (ast::StateSpace, &ast::Type),
instruction_space: ast::StateSpace,
) -> Result<Option<ConversionKind>, TranslateError> {
if (instruction_space == ast::StateSpace::Generic && coerces_to_generic(operand_space))
|| (operand_space == ast::StateSpace::Generic && coerces_to_generic(instruction_space))
@ -175,15 +172,6 @@ fn default_implicit_conversion_space(
Ok(Some(ConversionKind::PtrToPtr))
} else if operand_space == ast::StateSpace::Reg {
match operand_type {
ast::Type::Pointer(operand_ptr_type, operand_ptr_space)
if *operand_ptr_space == instruction_space =>
{
if instruction_type != &ast::Type::Scalar(*operand_ptr_type) {
Ok(Some(ConversionKind::PtrToPtr))
} else {
Ok(None)
}
}
// TODO: 32 bit
ast::Type::Scalar(ast::ScalarType::B64)
| ast::Type::Scalar(ast::ScalarType::U64)
@ -205,19 +193,6 @@ fn default_implicit_conversion_space(
},
_ => Err(error_mismatched_type()),
}
} else if instruction_space == ast::StateSpace::Reg {
match instruction_type {
ast::Type::Pointer(instruction_ptr_type, instruction_ptr_space)
if operand_space == *instruction_ptr_space =>
{
if operand_type != &ast::Type::Scalar(*instruction_ptr_type) {
Ok(Some(ConversionKind::PtrToPtr))
} else {
Ok(None)
}
}
_ => Err(error_mismatched_type()),
}
} else {
Err(error_mismatched_type())
}

View File

@ -152,6 +152,7 @@ fn run_instruction<'input>(
..
}
| ast::Instruction::Mul24 { .. }
| ast::Instruction::Nanosleep { .. }
| ast::Instruction::Neg { .. }
| ast::Instruction::Not { .. }
| ast::Instruction::Or { .. }

View File

@ -1827,6 +1827,7 @@ fn get_modes<T: ast::Operand>(inst: &ast::Instruction<T>) -> InstructionModes {
| ast::Instruction::Cvta { .. }
| ast::Instruction::Atom { .. }
| ast::Instruction::Mul24 { .. }
| ast::Instruction::Nanosleep { .. }
| ast::Instruction::AtomCas { .. } => InstructionModes::none(),
ast::Instruction::Add {
data: ast::ArithDetails::Integer(_),

View File

@ -0,0 +1,34 @@
use std::ffi::CStr;
use super::*;
use super::super::*;
use llvm_zluda::{core::*};
pub(crate) fn run(context: &Context, attributes: Attributes) -> Result<llvm::Module, TranslateError> {
let module = llvm::Module::new(context, LLVM_UNNAMED);
emit_attribute(context, &module, "clock_rate", attributes.clock_rate)?;
if let Err(err) = module.verify() {
panic!("{:?}", err);
}
Ok(module)
}
fn emit_attribute(context: &Context, module: &llvm::Module, name: &str, attribute: u32) -> Result<(), TranslateError> {
let name = format!("{}attribute_{}\0", ZLUDA_PTX_PREFIX, name).to_ascii_uppercase();
let name = unsafe { CStr::from_bytes_with_nul_unchecked(name.as_bytes()) };
let attribute_type = get_scalar_type(context.get(), ast::ScalarType::U32);
let global = unsafe {
LLVMAddGlobalInAddressSpace(
module.get(),
attribute_type,
name.as_ptr(),
get_state_space(ast::StateSpace::Global)?,
)
};
unsafe { LLVMSetInitializer(global, LLVMConstInt(attribute_type, attribute as u64, 0)) };
unsafe { LLVMSetGlobalConstant(global, 1) };
Ok(())
}

View File

@ -31,94 +31,12 @@ use std::ops::Deref;
use std::{i8, ptr, u64};
use super::*;
use llvm_zluda::analysis::{LLVMVerifierFailureAction, LLVMVerifyModule};
use llvm_zluda::bit_writer::LLVMWriteBitcodeToMemoryBuffer;
use crate::pass::*;
use llvm_zluda::{core::*, *};
use llvm_zluda::{prelude::*, LLVMZludaBuildAtomicRMW};
use llvm_zluda::{LLVMCallConv, LLVMZludaBuildAlloca};
use ptx_parser::Mul24Control;
const LLVM_UNNAMED: &CStr = c"";
// https://llvm.org/docs/AMDGPUUsage.html#address-spaces
const GENERIC_ADDRESS_SPACE: u32 = 0;
const GLOBAL_ADDRESS_SPACE: u32 = 1;
const SHARED_ADDRESS_SPACE: u32 = 3;
const CONSTANT_ADDRESS_SPACE: u32 = 4;
const PRIVATE_ADDRESS_SPACE: u32 = 5;
struct Context(LLVMContextRef);
impl Context {
fn new() -> Self {
Self(unsafe { LLVMContextCreate() })
}
fn get(&self) -> LLVMContextRef {
self.0
}
}
impl Drop for Context {
fn drop(&mut self) {
unsafe {
LLVMContextDispose(self.0);
}
}
}
pub struct Module(LLVMModuleRef, Context);
impl Module {
fn new(ctx: Context, name: &CStr) -> Self {
Self(
unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) },
ctx,
)
}
fn get(&self) -> LLVMModuleRef {
self.0
}
fn context(&self) -> &Context {
&self.1
}
fn verify(&self) -> Result<(), Message> {
let mut err = ptr::null_mut();
let error = unsafe {
LLVMVerifyModule(
self.get(),
LLVMVerifierFailureAction::LLVMReturnStatusAction,
&mut err,
)
};
if error == 1 && err != ptr::null_mut() {
Err(Message(unsafe { CStr::from_ptr(err) }))
} else {
Ok(())
}
}
pub fn write_bitcode_to_memory(&self) -> MemoryBuffer {
let memory_buffer = unsafe { LLVMWriteBitcodeToMemoryBuffer(self.get()) };
MemoryBuffer(memory_buffer)
}
pub fn print_module_to_string(&self) -> Message {
let asm = unsafe { LLVMPrintModuleToString(self.get()) };
Message(unsafe { CStr::from_ptr(asm) })
}
}
impl Drop for Module {
fn drop(&mut self) {
unsafe {
LLVMDisposeModule(self.0);
}
}
}
struct Builder(LLVMBuilderRef);
impl Builder {
@ -143,55 +61,13 @@ impl Drop for Builder {
}
}
pub struct Message(&'static CStr);
impl Drop for Message {
fn drop(&mut self) {
unsafe {
LLVMDisposeMessage(self.0.as_ptr().cast_mut());
}
}
}
impl std::fmt::Debug for Message {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0, f)
}
}
impl Message {
pub fn to_str(&self) -> &str {
self.0.to_str().unwrap().trim()
}
}
pub struct MemoryBuffer(LLVMMemoryBufferRef);
impl Drop for MemoryBuffer {
fn drop(&mut self) {
unsafe {
LLVMDisposeMemoryBuffer(self.0);
}
}
}
impl Deref for MemoryBuffer {
type Target = [u8];
fn deref(&self) -> &Self::Target {
let data = unsafe { LLVMGetBufferStart(self.0) };
let len = unsafe { LLVMGetBufferSize(self.0) };
unsafe { std::slice::from_raw_parts(data.cast(), len) }
}
}
pub(super) fn run<'input>(
pub(crate) fn run<'input>(
context: &Context,
id_defs: GlobalStringIdentResolver2<'input>,
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
) -> Result<Module, TranslateError> {
let context = Context::new();
let module = Module::new(context, LLVM_UNNAMED);
let mut emit_ctx = ModuleEmitContext::new(&module, &id_defs);
) -> Result<llvm::Module, TranslateError> {
let module = llvm::Module::new(context, LLVM_UNNAMED);
let mut emit_ctx = ModuleEmitContext::new(context, &module, &id_defs);
for directive in directives {
match directive {
Directive2::Variable(linking, variable) => emit_ctx.emit_global(linking, variable)?,
@ -213,8 +89,7 @@ struct ModuleEmitContext<'a, 'input> {
}
impl<'a, 'input> ModuleEmitContext<'a, 'input> {
fn new(module: &Module, id_defs: &'a GlobalStringIdentResolver2<'input>) -> Self {
let context = module.context();
fn new(context: &Context, module: &llvm::Module, id_defs: &'a GlobalStringIdentResolver2<'input>) -> Self {
ModuleEmitContext {
context: context.get(),
module: module.get(),
@ -647,7 +522,8 @@ impl<'a> MethodEmitContext<'a> {
| ast::Instruction::BarRed { .. }
| ast::Instruction::Bfi { .. }
| ast::Instruction::Activemask { .. }
| ast::Instruction::ShflSync { .. } => return Err(error_unreachable()),
| ast::Instruction::ShflSync { .. }
| ast::Instruction::Nanosleep { .. } => return Err(error_unreachable()),
}
}
@ -662,8 +538,10 @@ impl<'a> MethodEmitContext<'a> {
let builder = self.builder;
let type_ = get_type(self.context, &data.typ)?;
let ptr = self.resolver.value(arguments.src)?;
self.resolver.with_result(arguments.dst, |dst| unsafe {
LLVMBuildLoad2(builder, type_, ptr, dst)
self.resolver.with_result(arguments.dst, |dst| {
let load = unsafe { LLVMBuildLoad2(builder, type_, ptr, dst) };
unsafe { LLVMSetAlignment(load, data.typ.layout().align() as u32) };
load
});
Ok(())
}
@ -882,7 +760,8 @@ impl<'a> MethodEmitContext<'a> {
if data.qualifier != ast::LdStQualifier::Weak {
todo!()
}
unsafe { LLVMBuildStore(self.builder, value, ptr) };
let store = unsafe { LLVMBuildStore(self.builder, value, ptr) };
unsafe { LLVMSetAlignment(store, data.typ.layout().align() as u32); }
Ok(())
}
@ -1096,8 +975,14 @@ impl<'a> MethodEmitContext<'a> {
.iter()
.map(|(value, type_)| {
let value = self.resolver.value(*value)?;
let type_ = get_type(self.context, type_)?;
Ok(unsafe { LLVMBuildLoad2(self.builder, type_, value, LLVM_UNNAMED.as_ptr()) })
let lowered_type = get_type(self.context, type_)?;
let load = unsafe {
LLVMBuildLoad2(self.builder, lowered_type, value, LLVM_UNNAMED.as_ptr())
};
unsafe {
LLVMSetAlignment(load, type_.layout().align() as u32);
}
Ok(load)
})
.collect::<Result<Vec<_>, _>>()?;
@ -2942,37 +2827,9 @@ fn get_type(context: LLVMContextRef, type_: &ast::Type) -> Result<LLVMTypeRef, T
LLVMArrayType2(result, *dimension as u64)
})
}
ast::Type::Pointer(_, space) => get_pointer_type(context, *space)?,
})
}
fn get_scalar_type(context: LLVMContextRef, type_: ast::ScalarType) -> LLVMTypeRef {
match type_ {
ast::ScalarType::Pred => unsafe { LLVMInt1TypeInContext(context) },
ast::ScalarType::S8 | ast::ScalarType::B8 | ast::ScalarType::U8 => unsafe {
LLVMInt8TypeInContext(context)
},
ast::ScalarType::B16 | ast::ScalarType::U16 | ast::ScalarType::S16 => unsafe {
LLVMInt16TypeInContext(context)
},
ast::ScalarType::S32 | ast::ScalarType::B32 | ast::ScalarType::U32 => unsafe {
LLVMInt32TypeInContext(context)
},
ast::ScalarType::U64 | ast::ScalarType::S64 | ast::ScalarType::B64 => unsafe {
LLVMInt64TypeInContext(context)
},
ast::ScalarType::B128 => unsafe { LLVMInt128TypeInContext(context) },
ast::ScalarType::F16 => unsafe { LLVMHalfTypeInContext(context) },
ast::ScalarType::F32 => unsafe { LLVMFloatTypeInContext(context) },
ast::ScalarType::F64 => unsafe { LLVMDoubleTypeInContext(context) },
ast::ScalarType::BF16 => unsafe { LLVMBFloatTypeInContext(context) },
ast::ScalarType::U16x2 => todo!(),
ast::ScalarType::S16x2 => todo!(),
ast::ScalarType::F16x2 => todo!(),
ast::ScalarType::BF16x2 => todo!(),
}
}
fn get_array_type<'a>(
context: LLVMContextRef,
elem_type: &'a ast::Type,
@ -3025,22 +2882,6 @@ fn get_function_type<'a>(
})
}
fn get_state_space(space: ast::StateSpace) -> Result<u32, TranslateError> {
match space {
ast::StateSpace::Reg => Ok(PRIVATE_ADDRESS_SPACE),
ast::StateSpace::Generic => Ok(GENERIC_ADDRESS_SPACE),
ast::StateSpace::Param => Err(TranslateError::Todo("".to_string())),
ast::StateSpace::ParamEntry => Ok(CONSTANT_ADDRESS_SPACE),
ast::StateSpace::ParamFunc => Err(TranslateError::Todo("".to_string())),
ast::StateSpace::Local => Ok(PRIVATE_ADDRESS_SPACE),
ast::StateSpace::Global => Ok(GLOBAL_ADDRESS_SPACE),
ast::StateSpace::Const => Ok(CONSTANT_ADDRESS_SPACE),
ast::StateSpace::Shared => Ok(SHARED_ADDRESS_SPACE),
ast::StateSpace::SharedCta => Err(TranslateError::Todo("".to_string())),
ast::StateSpace::SharedCluster => Err(TranslateError::Todo("".to_string())),
}
}
struct ResolveIdent {
words: HashMap<SpirvWord, String>,
values: HashMap<SpirvWord, LLVMValueRef>,

173
ptx/src/pass/llvm/mod.rs Normal file
View File

@ -0,0 +1,173 @@
pub(super) mod emit;
pub(super) mod attributes;
use std::ffi::CStr;
use std::ops::Deref;
use std::ptr;
use crate::pass::*;
use llvm_zluda::analysis::{LLVMVerifierFailureAction, LLVMVerifyModule};
use llvm_zluda::bit_writer::LLVMWriteBitcodeToMemoryBuffer;
use llvm_zluda::core::*;
use llvm_zluda::prelude::*;
const LLVM_UNNAMED: &CStr = c"";
// https://llvm.org/docs/AMDGPUUsage.html#address-spaces
const GENERIC_ADDRESS_SPACE: u32 = 0;
const GLOBAL_ADDRESS_SPACE: u32 = 1;
const SHARED_ADDRESS_SPACE: u32 = 3;
const CONSTANT_ADDRESS_SPACE: u32 = 4;
const PRIVATE_ADDRESS_SPACE: u32 = 5;
pub(super) struct Context(LLVMContextRef);
impl Context {
pub fn new() -> Self {
Self(unsafe { LLVMContextCreate() })
}
fn get(&self) -> LLVMContextRef {
self.0
}
}
impl Drop for Context {
fn drop(&mut self) {
unsafe {
LLVMContextDispose(self.0);
}
}
}
pub struct Module(LLVMModuleRef);
impl Module {
fn new(ctx: &Context, name: &CStr) -> Self {
Self(
unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), ctx.get()) },
)
}
fn get(&self) -> LLVMModuleRef {
self.0
}
fn verify(&self) -> Result<(), Message> {
let mut err = ptr::null_mut();
let error = unsafe {
LLVMVerifyModule(
self.get(),
LLVMVerifierFailureAction::LLVMReturnStatusAction,
&mut err,
)
};
if error == 1 && err != ptr::null_mut() {
Err(Message(unsafe { CStr::from_ptr(err) }))
} else {
Ok(())
}
}
pub fn write_bitcode_to_memory(&self) -> MemoryBuffer {
let memory_buffer = unsafe { LLVMWriteBitcodeToMemoryBuffer(self.get()) };
MemoryBuffer(memory_buffer)
}
pub fn print_module_to_string(&self) -> Message {
let asm = unsafe { LLVMPrintModuleToString(self.get()) };
Message(unsafe { CStr::from_ptr(asm) })
}
}
impl Drop for Module {
fn drop(&mut self) {
unsafe {
LLVMDisposeModule(self.0);
}
}
}
pub struct Message(&'static CStr);
impl Drop for Message {
fn drop(&mut self) {
unsafe {
LLVMDisposeMessage(self.0.as_ptr().cast_mut());
}
}
}
impl std::fmt::Debug for Message {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0, f)
}
}
impl Message {
pub fn to_str(&self) -> &str {
self.0.to_str().unwrap().trim()
}
}
pub struct MemoryBuffer(LLVMMemoryBufferRef);
impl Drop for MemoryBuffer {
fn drop(&mut self) {
unsafe {
LLVMDisposeMemoryBuffer(self.0);
}
}
}
impl Deref for MemoryBuffer {
type Target = [u8];
fn deref(&self) -> &Self::Target {
let data = unsafe { LLVMGetBufferStart(self.0) };
let len = unsafe { LLVMGetBufferSize(self.0) };
unsafe { std::slice::from_raw_parts(data.cast(), len) }
}
}
fn get_scalar_type(context: LLVMContextRef, type_: ast::ScalarType) -> LLVMTypeRef {
match type_ {
ast::ScalarType::Pred => unsafe { LLVMInt1TypeInContext(context) },
ast::ScalarType::S8 | ast::ScalarType::B8 | ast::ScalarType::U8 => unsafe {
LLVMInt8TypeInContext(context)
},
ast::ScalarType::B16 | ast::ScalarType::U16 | ast::ScalarType::S16 => unsafe {
LLVMInt16TypeInContext(context)
},
ast::ScalarType::S32 | ast::ScalarType::B32 | ast::ScalarType::U32 => unsafe {
LLVMInt32TypeInContext(context)
},
ast::ScalarType::U64 | ast::ScalarType::S64 | ast::ScalarType::B64 => unsafe {
LLVMInt64TypeInContext(context)
},
ast::ScalarType::B128 => unsafe { LLVMInt128TypeInContext(context) },
ast::ScalarType::F16 => unsafe { LLVMHalfTypeInContext(context) },
ast::ScalarType::F32 => unsafe { LLVMFloatTypeInContext(context) },
ast::ScalarType::F64 => unsafe { LLVMDoubleTypeInContext(context) },
ast::ScalarType::BF16 => unsafe { LLVMBFloatTypeInContext(context) },
ast::ScalarType::U16x2 => todo!(),
ast::ScalarType::S16x2 => todo!(),
ast::ScalarType::F16x2 => todo!(),
ast::ScalarType::BF16x2 => todo!(),
}
}
fn get_state_space(space: ast::StateSpace) -> Result<u32, TranslateError> {
match space {
ast::StateSpace::Reg => Ok(PRIVATE_ADDRESS_SPACE),
ast::StateSpace::Generic => Ok(GENERIC_ADDRESS_SPACE),
ast::StateSpace::Param => Err(TranslateError::Todo("".to_string())),
ast::StateSpace::ParamEntry => Ok(CONSTANT_ADDRESS_SPACE),
ast::StateSpace::ParamFunc => Err(TranslateError::Todo("".to_string())),
ast::StateSpace::Local => Ok(PRIVATE_ADDRESS_SPACE),
ast::StateSpace::Global => Ok(GLOBAL_ADDRESS_SPACE),
ast::StateSpace::Const => Ok(CONSTANT_ADDRESS_SPACE),
ast::StateSpace::Shared => Ok(SHARED_ADDRESS_SPACE),
ast::StateSpace::SharedCta => Err(TranslateError::Todo("".to_string())),
ast::StateSpace::SharedCluster => Err(TranslateError::Todo("".to_string())),
}
}

View File

@ -12,7 +12,6 @@ use strum::IntoEnumIterator;
use strum_macros::EnumIter;
mod deparamize_functions;
pub(crate) mod emit_llvm;
mod expand_operands;
mod fix_special_registers2;
mod hoist_globals;
@ -20,6 +19,7 @@ mod insert_explicit_load_store;
mod insert_implicit_conversions2;
mod insert_post_saturation;
mod instruction_mode_to_global_mode;
mod llvm;
mod normalize_basic_blocks;
mod normalize_identifiers2;
mod normalize_predicates2;
@ -46,7 +46,13 @@ quick_error! {
}
}
pub fn to_llvm_module<'input>(ast: ast::Module<'input>) -> Result<Module, TranslateError> {
/// GPU attributes needed at compile time.
pub struct Attributes {
/// Clock frequency in kHz.
pub clock_rate: u32,
}
pub fn to_llvm_module<'input>(ast: ast::Module<'input>, attributes: Attributes) -> Result<Module, TranslateError> {
let mut flat_resolver = GlobalStringIdentResolver2::<'input>::new(SpirvWord(1));
let mut scoped_resolver = ScopedResolver::new(&mut flat_resolver);
let sreg_map = SpecialRegistersMap2::new(&mut scoped_resolver)?;
@ -65,16 +71,23 @@ pub fn to_llvm_module<'input>(ast: ast::Module<'input>) -> Result<Module, Transl
let directives = insert_implicit_conversions2::run(&mut flat_resolver, directives)?;
let directives = replace_instructions_with_function_calls::run(&mut flat_resolver, directives)?;
let directives = hoist_globals::run(directives)?;
let llvm_ir = emit_llvm::run(flat_resolver, directives)?;
let context = llvm::Context::new();
let llvm_ir = llvm::emit::run(&context, flat_resolver, directives)?;
let attributes_ir = llvm::attributes::run(&context, attributes)?;
Ok(Module {
llvm_ir,
attributes_ir,
kernel_info: HashMap::new(),
_context: context,
})
}
pub struct Module {
pub llvm_ir: emit_llvm::Module,
pub llvm_ir: llvm::Module,
pub attributes_ir: llvm::Module,
pub kernel_info: HashMap<String, KernelInfo>,
_context: llvm::Context,
}
impl Module {

View File

@ -182,6 +182,9 @@ fn run_instruction<'input>(
ptx_parser::Instruction::ShflSync { data, arguments },
)?
}
i @ ptx_parser::Instruction::Nanosleep { .. } => {
to_call(resolver, fn_declarations, "nanosleep_u32".into(), i)?
}
i => i,
})
}

View File

@ -0,0 +1 @@
@__ZLUDA_PTX_IMPL_ATTRIBUTE_CLOCK_RATE = addrspace(1) constant i32 2124000

View File

@ -9,11 +9,11 @@ define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"29", ptr add
br label %"28"
"28": ; preds = %1
%"33" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"33", ptr addrspace(5) %"31", align 4
%"33" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"33", ptr addrspace(5) %"31", align 8
%"34" = call i32 @__zluda_ptx_impl_activemask()
store i32 %"34", ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(5) %"31", align 4
%"35" = load i64, ptr addrspace(5) %"31", align 8
%"36" = load i32, ptr addrspace(5) %"32", align 4
%"37" = inttoptr i64 %"35" to ptr
store i32 %"36", ptr %"37", align 4

View File

@ -9,21 +9,21 @@ define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"32", ptr addrspace(
br label %"31"
"31": ; preds = %1
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr addrspace(5) %"34", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"41" = load i64, ptr addrspace(5) %"34", align 8
%"46" = inttoptr i64 %"41" to ptr
%"40" = load i64, ptr %"46", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr %"46", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"42" = add i64 %"43", 1
store i64 %"42", ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"35", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
store i64 %"42", ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(5) %"35", align 8
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"47" = inttoptr i64 %"44" to ptr
store i64 %"45", ptr %"47", align 4
store i64 %"45", ptr %"47", align 8
ret void
}

View File

@ -9,21 +9,21 @@ define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"32", p
br label %"31"
"31": ; preds = %1
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr addrspace(5) %"34", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"41" = load i64, ptr addrspace(5) %"34", align 8
%"46" = inttoptr i64 %"41" to ptr addrspace(1)
%"40" = load i64, ptr addrspace(1) %"46", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr addrspace(1) %"46", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"42" = add i64 %"43", 1
store i64 %"42", ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"35", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
store i64 %"42", ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(5) %"35", align 8
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"47" = inttoptr i64 %"44" to ptr addrspace(1)
store i64 %"45", ptr addrspace(1) %"47", align 4
store i64 %"45", ptr addrspace(1) %"47", align 8
ret void
}

View File

@ -11,15 +11,15 @@ define amdgpu_kernel void @add_s32_sat(ptr addrspace(4) byref(i64) %"37", ptr ad
br label %"36"
"36": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"45", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"46", ptr addrspace(5) %"40", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"45", ptr addrspace(5) %"39", align 8
%"46" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"46", ptr addrspace(5) %"40", align 8
%"48" = load i64, ptr addrspace(5) %"39", align 8
%"61" = inttoptr i64 %"48" to ptr
%"47" = load i32, ptr %"61", align 4
store i32 %"47", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"39", align 8
%"62" = inttoptr i64 %"49" to ptr
%"33" = getelementptr inbounds i8, ptr %"62", i64 4
%"50" = load i32, ptr %"33", align 4
@ -32,11 +32,11 @@ define amdgpu_kernel void @add_s32_sat(ptr addrspace(4) byref(i64) %"37", ptr ad
%"56" = load i32, ptr addrspace(5) %"42", align 4
%"54" = add i32 %"55", %"56"
store i32 %"54", ptr addrspace(5) %"44", align 4
%"57" = load i64, ptr addrspace(5) %"40", align 4
%"57" = load i64, ptr addrspace(5) %"40", align 8
%"58" = load i32, ptr addrspace(5) %"43", align 4
%"63" = inttoptr i64 %"57" to ptr
store i32 %"58", ptr %"63", align 4
%"59" = load i64, ptr addrspace(5) %"40", align 4
%"59" = load i64, ptr addrspace(5) %"40", align 8
%"64" = inttoptr i64 %"59" to ptr
%"35" = getelementptr inbounds i8, ptr %"64", i64 4
%"60" = load i32, ptr addrspace(5) %"44", align 4

View File

@ -9,21 +9,21 @@ define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"32", ptr add
br label %"31"
"31": ; preds = %1
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr addrspace(5) %"34", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"41" = load i64, ptr addrspace(5) %"34", align 8
%"46" = inttoptr i64 %"41" to ptr
%"40" = load i64, ptr %"46", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr %"46", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"42" = add i64 %"43", 1
store i64 %"42", ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"35", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
store i64 %"42", ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(5) %"35", align 8
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"47" = inttoptr i64 %"44" to ptr
store i64 %"45", ptr %"47", align 4
store i64 %"45", ptr %"47", align 8
ret void
}

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"50", align 4
store i32 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load i32, ptr %"31", align 4
@ -26,7 +26,7 @@ define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
%"47" = load i32, ptr addrspace(5) %"38", align 4
%"52" = and i32 %"46", %"47"
store i32 %"52", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load i32, ptr addrspace(5) %"37", align 4
%"55" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"55", align 4

View File

@ -17,46 +17,46 @@ define amdgpu_kernel void @assertfail(ptr addrspace(4) byref(i64) %"86", ptr add
br label %"84"
"84": ; preds = %1
%"92" = load i64, ptr addrspace(4) %"86", align 4
store i64 %"92", ptr addrspace(5) %"88", align 4
%"93" = load i64, ptr addrspace(4) %"87", align 4
store i64 %"93", ptr addrspace(5) %"89", align 4
%"92" = load i64, ptr addrspace(4) %"86", align 8
store i64 %"92", ptr addrspace(5) %"88", align 8
%"93" = load i64, ptr addrspace(4) %"87", align 8
store i64 %"93", ptr addrspace(5) %"89", align 8
store i32 0, ptr addrspace(5) %"94", align 4
%"97" = getelementptr inbounds i8, ptr addrspace(5) %"96", i64 0
%"98" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"98", ptr addrspace(5) %"97", align 4
%"98" = load i64, ptr addrspace(5) %"88", align 8
store i64 %"98", ptr addrspace(5) %"97", align 8
%"100" = getelementptr inbounds i8, ptr addrspace(5) %"99", i64 0
%"101" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"101", ptr addrspace(5) %"100", align 4
%"101" = load i64, ptr addrspace(5) %"88", align 8
store i64 %"101", ptr addrspace(5) %"100", align 8
%"103" = getelementptr inbounds i8, ptr addrspace(5) %"102", i64 0
%"104" = load i32, ptr addrspace(5) %"94", align 4
store i32 %"104", ptr addrspace(5) %"103", align 4
%"106" = getelementptr inbounds i8, ptr addrspace(5) %"105", i64 0
%"107" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"107", ptr addrspace(5) %"106", align 4
%"107" = load i64, ptr addrspace(5) %"88", align 8
store i64 %"107", ptr addrspace(5) %"106", align 8
%"109" = getelementptr inbounds i8, ptr addrspace(5) %"108", i64 0
%"110" = load i64, ptr addrspace(5) %"88", align 4
store i64 %"110", ptr addrspace(5) %"109", align 4
%"74" = load i64, ptr addrspace(5) %"96", align 4
%"75" = load i64, ptr addrspace(5) %"99", align 4
%"110" = load i64, ptr addrspace(5) %"88", align 8
store i64 %"110", ptr addrspace(5) %"109", align 8
%"74" = load i64, ptr addrspace(5) %"96", align 8
%"75" = load i64, ptr addrspace(5) %"99", align 8
%"76" = load i32, ptr addrspace(5) %"102", align 4
%"77" = load i64, ptr addrspace(5) %"105", align 4
%"78" = load i64, ptr addrspace(5) %"108", align 4
%"77" = load i64, ptr addrspace(5) %"105", align 8
%"78" = load i64, ptr addrspace(5) %"108", align 8
call void @__zluda_ptx_impl___assertfail(i64 %"74", i64 %"75", i32 %"76", i64 %"77", i64 %"78")
br label %"85"
"85": ; preds = %"84"
%"112" = load i64, ptr addrspace(5) %"88", align 4
%"112" = load i64, ptr addrspace(5) %"88", align 8
%"122" = inttoptr i64 %"112" to ptr
%"111" = load i64, ptr %"122", align 4
store i64 %"111", ptr addrspace(5) %"90", align 4
%"114" = load i64, ptr addrspace(5) %"90", align 4
%"111" = load i64, ptr %"122", align 8
store i64 %"111", ptr addrspace(5) %"90", align 8
%"114" = load i64, ptr addrspace(5) %"90", align 8
%"113" = add i64 %"114", 1
store i64 %"113", ptr addrspace(5) %"91", align 4
%"115" = load i64, ptr addrspace(5) %"89", align 4
%"116" = load i64, ptr addrspace(5) %"91", align 4
store i64 %"113", ptr addrspace(5) %"91", align 8
%"115" = load i64, ptr addrspace(5) %"89", align 8
%"116" = load i64, ptr addrspace(5) %"91", align 8
%"123" = inttoptr i64 %"115" to ptr
store i64 %"116", ptr %"123", align 4
store i64 %"116", ptr %"123", align 8
ret void
}

View File

@ -11,15 +11,15 @@ define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"36", ptr addrs
br label %"35"
"35": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"43" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"43", ptr addrspace(5) %"39", align 8
%"45" = load i64, ptr addrspace(5) %"38", align 8
%"56" = inttoptr i64 %"45" to ptr
%"44" = load i32, ptr %"56", align 4
store i32 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 8
%"57" = inttoptr i64 %"46" to ptr
%"32" = getelementptr inbounds i8, ptr %"57", i64 4
%"47" = load i32, ptr %"32", align 4
@ -31,11 +31,11 @@ define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"36", ptr addrs
store i32 %2, ptr addrspace(5) %"40", align 4
%"51" = load i32, ptr addrspace(3) @shared_mem, align 4
store i32 %"51", ptr addrspace(5) %"41", align 4
%"52" = load i64, ptr addrspace(5) %"39", align 4
%"52" = load i64, ptr addrspace(5) %"39", align 8
%"53" = load i32, ptr addrspace(5) %"40", align 4
%"61" = inttoptr i64 %"52" to ptr
store i32 %"53", ptr %"61", align 4
%"54" = load i64, ptr addrspace(5) %"39", align 4
%"54" = load i64, ptr addrspace(5) %"39", align 8
%"62" = inttoptr i64 %"54" to ptr
%"34" = getelementptr inbounds i8, ptr %"62", i64 4
%"55" = load i32, ptr addrspace(5) %"41", align 4

View File

@ -11,15 +11,15 @@ define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"36", ptr
br label %"35"
"35": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"43" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"43", ptr addrspace(5) %"39", align 8
%"45" = load i64, ptr addrspace(5) %"38", align 8
%"56" = inttoptr i64 %"45" to ptr
%"44" = load float, ptr %"56", align 4
store float %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 8
%"57" = inttoptr i64 %"46" to ptr
%"32" = getelementptr inbounds i8, ptr %"57", i64 4
%"47" = load float, ptr %"32", align 4
@ -31,11 +31,11 @@ define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"36", ptr
store float %2, ptr addrspace(5) %"40", align 4
%"51" = load float, ptr addrspace(3) @shared_mem, align 4
store float %"51", ptr addrspace(5) %"41", align 4
%"52" = load i64, ptr addrspace(5) %"39", align 4
%"52" = load i64, ptr addrspace(5) %"39", align 8
%"53" = load float, ptr addrspace(5) %"40", align 4
%"61" = inttoptr i64 %"52" to ptr
store float %"53", ptr %"61", align 4
%"54" = load i64, ptr addrspace(5) %"39", align 4
%"54" = load i64, ptr addrspace(5) %"39", align 8
%"62" = inttoptr i64 %"54" to ptr
%"34" = getelementptr inbounds i8, ptr %"62", i64 4
%"55" = load float, ptr addrspace(5) %"41", align 4

View File

@ -9,31 +9,31 @@ define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"38", ptr addrs
br label %"37"
"37": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4
%"45" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4
%"44" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"44", ptr addrspace(5) %"40", align 8
%"45" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"45", ptr addrspace(5) %"41", align 8
%"47" = load i64, ptr addrspace(5) %"40", align 8
%"57" = inttoptr i64 %"47" to ptr
%"46" = load i32, ptr %"57", align 4
store i32 %"46", ptr addrspace(5) %"42", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 8
%"58" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"58", i64 4
%"50" = load i32, ptr addrspace(5) %"42", align 4
%2 = cmpxchg ptr %"31", i32 %"50", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
%"59" = extractvalue { i32, i1 } %2, 0
store i32 %"59", ptr addrspace(5) %"42", align 4
%"51" = load i64, ptr addrspace(5) %"40", align 4
%"51" = load i64, ptr addrspace(5) %"40", align 8
%"61" = inttoptr i64 %"51" to ptr
%"34" = getelementptr inbounds i8, ptr %"61", i64 4
%"52" = load i32, ptr %"34", align 4
store i32 %"52", ptr addrspace(5) %"43", align 4
%"53" = load i64, ptr addrspace(5) %"41", align 4
%"53" = load i64, ptr addrspace(5) %"41", align 8
%"54" = load i32, ptr addrspace(5) %"42", align 4
%"62" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"62", align 4
%"55" = load i64, ptr addrspace(5) %"41", align 4
%"55" = load i64, ptr addrspace(5) %"41", align 8
%"63" = inttoptr i64 %"55" to ptr
%"36" = getelementptr inbounds i8, ptr %"63", i64 4
%"56" = load i32, ptr addrspace(5) %"43", align 4

View File

@ -10,32 +10,32 @@ define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"38", ptr addrs
br label %"37"
"37": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"45", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"46", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"45" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"45", ptr addrspace(5) %"40", align 8
%"46" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"46", ptr addrspace(5) %"41", align 8
%"48" = load i64, ptr addrspace(5) %"40", align 8
%"59" = inttoptr i64 %"48" to ptr
%2 = atomicrmw uinc_wrap ptr %"59", i32 101 syncscope("agent-one-as") monotonic, align 4
store i32 %2, ptr addrspace(5) %"42", align 4
%"50" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"40", align 8
%"60" = inttoptr i64 %"50" to ptr addrspace(1)
%3 = atomicrmw uinc_wrap ptr addrspace(1) %"60", i32 101 syncscope("agent-one-as") monotonic, align 4
store i32 %3, ptr addrspace(5) %"43", align 4
%"52" = load i64, ptr addrspace(5) %"40", align 4
%"52" = load i64, ptr addrspace(5) %"40", align 8
%"61" = inttoptr i64 %"52" to ptr
%"51" = load i32, ptr %"61", align 4
store i32 %"51", ptr addrspace(5) %"44", align 4
%"53" = load i64, ptr addrspace(5) %"41", align 4
%"53" = load i64, ptr addrspace(5) %"41", align 8
%"54" = load i32, ptr addrspace(5) %"42", align 4
%"62" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"62", align 4
%"55" = load i64, ptr addrspace(5) %"41", align 4
%"55" = load i64, ptr addrspace(5) %"41", align 8
%"63" = inttoptr i64 %"55" to ptr
%"34" = getelementptr inbounds i8, ptr %"63", i64 4
%"56" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"56", ptr %"34", align 4
%"57" = load i64, ptr addrspace(5) %"41", align 4
%"57" = load i64, ptr addrspace(5) %"41", align 8
%"64" = inttoptr i64 %"57" to ptr
%"36" = getelementptr inbounds i8, ptr %"64", i64 8
%"58" = load i32, ptr addrspace(5) %"44", align 4

View File

@ -11,19 +11,19 @@ define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"31", ptr addrs
"30": ; preds = %1
%"37" = load double, ptr addrspace(4) %"31", align 8
store double %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"35", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"35", align 8
%"40" = load double, ptr addrspace(5) %"33", align 8
%"46" = bitcast double %"40" to i64
store i64 %"46", ptr addrspace(5) %"34", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 4
store i64 %"46", ptr addrspace(5) %"34", align 8
%"42" = load i64, ptr addrspace(5) %"34", align 8
%"47" = inttoptr i64 %"42" to ptr
%"41" = load i64, ptr %"47", align 4
store i64 %"41", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"44" = load i64, ptr addrspace(5) %"36", align 4
%"41" = load i64, ptr %"47", align 8
store i64 %"41", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"44" = load i64, ptr addrspace(5) %"36", align 8
%"48" = inttoptr i64 %"43" to ptr
store i64 %"44", ptr %"48", align 4
store i64 %"44", ptr %"48", align 8
ret void
}

View File

@ -18,8 +18,8 @@ define amdgpu_kernel void @bar_red_and_pred(ptr addrspace(4) byref(i64) %"73", p
br label %"70"
"70": ; preds = %1
%"82" = load i64, ptr addrspace(4) %"74", align 4
store i64 %"82", ptr addrspace(5) %"75", align 4
%"82" = load i64, ptr addrspace(4) %"74", align 8
store i64 %"82", ptr addrspace(5) %"75", align 8
%"44" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
br label %"71"
@ -102,15 +102,15 @@ define amdgpu_kernel void @bar_red_and_pred(ptr addrspace(4) byref(i64) %"73", p
"26": ; preds = %"25", %"24"
%"118" = load i32, ptr addrspace(5) %"77", align 4
%"117" = zext i32 %"118" to i64
store i64 %"117", ptr addrspace(5) %"76", align 4
%"120" = load i64, ptr addrspace(5) %"76", align 4
store i64 %"117", ptr addrspace(5) %"76", align 8
%"120" = load i64, ptr addrspace(5) %"76", align 8
%"119" = mul i64 %"120", 4
store i64 %"119", ptr addrspace(5) %"76", align 4
%"122" = load i64, ptr addrspace(5) %"75", align 4
%"123" = load i64, ptr addrspace(5) %"76", align 4
store i64 %"119", ptr addrspace(5) %"76", align 8
%"122" = load i64, ptr addrspace(5) %"75", align 8
%"123" = load i64, ptr addrspace(5) %"76", align 8
%"121" = add i64 %"122", %"123"
store i64 %"121", ptr addrspace(5) %"75", align 4
%"124" = load i64, ptr addrspace(5) %"75", align 4
store i64 %"121", ptr addrspace(5) %"75", align 8
%"124" = load i64, ptr addrspace(5) %"75", align 8
%"125" = load i32, ptr addrspace(5) %"81", align 4
%"126" = inttoptr i64 %"124" to ptr
store i32 %"125", ptr %"126", align 4

View File

@ -12,20 +12,20 @@ define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"36", ptr addrspace(
br label %"35"
"35": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"43", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"44", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"43", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"44", ptr addrspace(5) %"39", align 8
%"46" = load i64, ptr addrspace(5) %"38", align 8
%"57" = inttoptr i64 %"46" to ptr
%"45" = load i32, ptr %"57", align 4
store i32 %"45", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 8
%"58" = inttoptr i64 %"47" to ptr
%"32" = getelementptr inbounds i8, ptr %"58", i64 4
%"48" = load i32, ptr %"32", align 4
store i32 %"48", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 8
%"59" = inttoptr i64 %"49" to ptr
%"34" = getelementptr inbounds i8, ptr %"59", i64 8
%"50" = load i32, ptr %"34", align 4
@ -35,7 +35,7 @@ define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"36", ptr addrspace(
%"54" = load i32, ptr addrspace(5) %"42", align 4
%"51" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"52", i32 %"53", i32 %"54")
store i32 %"51", ptr addrspace(5) %"40", align 4
%"55" = load i64, ptr addrspace(5) %"39", align 4
%"55" = load i64, ptr addrspace(5) %"39", align 8
%"56" = load i32, ptr addrspace(5) %"40", align 4
%"60" = inttoptr i64 %"55" to ptr
store i32 %"56", ptr %"60", align 4

View File

@ -13,25 +13,25 @@ define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"39", ptr addrspace(
br label %"38"
"38": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"47", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"48", ptr addrspace(5) %"42", align 4
%"50" = load i64, ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"47", ptr addrspace(5) %"41", align 8
%"48" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"48", ptr addrspace(5) %"42", align 8
%"50" = load i64, ptr addrspace(5) %"41", align 8
%"64" = inttoptr i64 %"50" to ptr
%"49" = load i32, ptr %"64", align 4
store i32 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr addrspace(5) %"41", align 4
%"51" = load i64, ptr addrspace(5) %"41", align 8
%"65" = inttoptr i64 %"51" to ptr
%"33" = getelementptr inbounds i8, ptr %"65", i64 4
%"52" = load i32, ptr %"33", align 4
store i32 %"52", ptr addrspace(5) %"44", align 4
%"53" = load i64, ptr addrspace(5) %"41", align 4
%"53" = load i64, ptr addrspace(5) %"41", align 8
%"66" = inttoptr i64 %"53" to ptr
%"35" = getelementptr inbounds i8, ptr %"66", i64 8
%"54" = load i32, ptr %"35", align 4
store i32 %"54", ptr addrspace(5) %"45", align 4
%"55" = load i64, ptr addrspace(5) %"41", align 4
%"55" = load i64, ptr addrspace(5) %"41", align 8
%"67" = inttoptr i64 %"55" to ptr
%"37" = getelementptr inbounds i8, ptr %"67", i64 12
%"56" = load i32, ptr %"37", align 4
@ -42,7 +42,7 @@ define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"39", ptr addrspace(
%"61" = load i32, ptr addrspace(5) %"46", align 4
%"68" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"58", i32 %"59", i32 %"60", i32 %"61")
store i32 %"68", ptr addrspace(5) %"43", align 4
%"62" = load i64, ptr addrspace(5) %"42", align 4
%"62" = load i64, ptr addrspace(5) %"42", align 8
%"63" = load i32, ptr addrspace(5) %"43", align 4
%"71" = inttoptr i64 %"62" to ptr
store i32 %"63", ptr %"71", align 4

View File

@ -10,24 +10,24 @@ define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"34", ptr addrspac
br label %"33"
"33": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"51" = inttoptr i64 %"43" to ptr
%"42" = load i64, ptr %"51", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"42" = load i64, ptr %"51", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"45" = load i64, ptr addrspace(5) %"38", align 8
%"44" = add i64 %"45", 1
store i64 %"44", ptr addrspace(5) %"39", align 4
%"48" = load i64, ptr addrspace(5) %"46", align 4
store i64 %"44", ptr addrspace(5) %"39", align 8
%"48" = load i64, ptr addrspace(5) %"46", align 8
%"47" = add i64 %"48", 1
store i64 %"47", ptr addrspace(5) %"46", align 4
%"49" = load i64, ptr addrspace(5) %"37", align 4
%"50" = load i64, ptr addrspace(5) %"39", align 4
store i64 %"47", ptr addrspace(5) %"46", align 8
%"49" = load i64, ptr addrspace(5) %"37", align 8
%"50" = load i64, ptr addrspace(5) %"39", align 8
%"52" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"52", align 4
store i64 %"50", ptr %"52", align 8
ret void
}

View File

@ -9,27 +9,27 @@ define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"36", ptr addrspace(
br label %"35"
"35": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"43" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"43", ptr addrspace(5) %"39", align 8
%"45" = load i64, ptr addrspace(5) %"38", align 8
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4
%"44" = load i64, ptr %"50", align 8
store i64 %"44", ptr addrspace(5) %"40", align 8
br label %"10"
"10": ; preds = %"35"
%"47" = load i64, ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 8
%"46" = add i64 %"47", 1
store i64 %"46", ptr addrspace(5) %"41", align 4
store i64 %"46", ptr addrspace(5) %"41", align 8
br label %"12"
"12": ; preds = %"10"
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 8
%"49" = load i64, ptr addrspace(5) %"41", align 8
%"51" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"51", align 4
store i64 %"49", ptr %"51", align 8
ret void
}

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"30", ptr addrspace
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load i32, ptr %"43", align 4
store i32 %"37", ptr addrspace(5) %"34", align 4
%"40" = load i32, ptr addrspace(5) %"34", align 4
%"39" = call i32 @llvm.bitreverse.i32(i32 %"40")
store i32 %"39", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load i32, ptr addrspace(5) %"34", align 4
%"44" = inttoptr i64 %"41" to ptr
store i32 %"42", ptr %"44", align 4

View File

@ -9,17 +9,17 @@ define i64 @incr(i64 %"43") #0 {
br label %"46"
"46": ; preds = %1
store i64 %"43", ptr addrspace(5) %"65", align 4
%"67" = load i64, ptr addrspace(5) %"65", align 4
store i64 %"67", ptr addrspace(5) %"66", align 4
%"69" = load i64, ptr addrspace(5) %"66", align 4
store i64 %"43", ptr addrspace(5) %"65", align 8
%"67" = load i64, ptr addrspace(5) %"65", align 8
store i64 %"67", ptr addrspace(5) %"66", align 8
%"69" = load i64, ptr addrspace(5) %"66", align 8
%"68" = add i64 %"69", 1
store i64 %"68", ptr addrspace(5) %"66", align 4
%"70" = load i64, ptr addrspace(5) %"66", align 4
store i64 %"70", ptr addrspace(5) %"64", align 4
%"71" = load i64, ptr addrspace(5) %"64", align 4
store i64 %"71", ptr addrspace(5) %"63", align 4
%2 = load i64, ptr addrspace(5) %"63", align 4
store i64 %"68", ptr addrspace(5) %"66", align 8
%"70" = load i64, ptr addrspace(5) %"66", align 8
store i64 %"70", ptr addrspace(5) %"64", align 8
%"71" = load i64, ptr addrspace(5) %"64", align 8
store i64 %"71", ptr addrspace(5) %"63", align 8
%2 = load i64, ptr addrspace(5) %"63", align 8
ret i64 %2
}
@ -35,28 +35,28 @@ define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"48", ptr addrspace
br label %"44"
"44": ; preds = %1
%"53" = load i64, ptr addrspace(4) %"48", align 4
store i64 %"53", ptr addrspace(5) %"50", align 4
%"54" = load i64, ptr addrspace(4) %"49", align 4
store i64 %"54", ptr addrspace(5) %"51", align 4
%"56" = load i64, ptr addrspace(5) %"50", align 4
%"53" = load i64, ptr addrspace(4) %"48", align 8
store i64 %"53", ptr addrspace(5) %"50", align 8
%"54" = load i64, ptr addrspace(4) %"49", align 8
store i64 %"54", ptr addrspace(5) %"51", align 8
%"56" = load i64, ptr addrspace(5) %"50", align 8
%"72" = inttoptr i64 %"56" to ptr addrspace(1)
%"55" = load i64, ptr addrspace(1) %"72", align 4
store i64 %"55", ptr addrspace(5) %"52", align 4
%"59" = load i64, ptr addrspace(5) %"52", align 4
store i64 %"59", ptr addrspace(5) %"57", align 4
%"40" = load i64, ptr addrspace(5) %"57", align 4
%"55" = load i64, ptr addrspace(1) %"72", align 8
store i64 %"55", ptr addrspace(5) %"52", align 8
%"59" = load i64, ptr addrspace(5) %"52", align 8
store i64 %"59", ptr addrspace(5) %"57", align 8
%"40" = load i64, ptr addrspace(5) %"57", align 8
%"41" = call i64 @incr(i64 %"40")
br label %"45"
"45": ; preds = %"44"
store i64 %"41", ptr addrspace(5) %"58", align 4
%"60" = load i64, ptr addrspace(5) %"58", align 4
store i64 %"60", ptr addrspace(5) %"52", align 4
%"61" = load i64, ptr addrspace(5) %"51", align 4
%"62" = load i64, ptr addrspace(5) %"52", align 4
store i64 %"41", ptr addrspace(5) %"58", align 8
%"60" = load i64, ptr addrspace(5) %"58", align 8
store i64 %"60", ptr addrspace(5) %"52", align 8
%"61" = load i64, ptr addrspace(5) %"51", align 8
%"62" = load i64, ptr addrspace(5) %"52", align 8
%"75" = inttoptr i64 %"61" to ptr addrspace(1)
store i64 %"62", ptr addrspace(1) %"75", align 4
store i64 %"62", ptr addrspace(1) %"75", align 8
ret void
}

View File

@ -86,25 +86,25 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"92", ptr addrs
"84": ; preds = %1
call void @llvm.amdgcn.s.setreg(i32 6145, i32 1)
%"108" = load i64, ptr addrspace(4) %"92", align 4
store i64 %"108", ptr addrspace(5) %"94", align 4
%"109" = load i64, ptr addrspace(4) %"93", align 4
store i64 %"109", ptr addrspace(5) %"95", align 4
%"111" = load i64, ptr addrspace(5) %"94", align 4
%"108" = load i64, ptr addrspace(4) %"92", align 8
store i64 %"108", ptr addrspace(5) %"94", align 8
%"109" = load i64, ptr addrspace(4) %"93", align 8
store i64 %"109", ptr addrspace(5) %"95", align 8
%"111" = load i64, ptr addrspace(5) %"94", align 8
%"154" = inttoptr i64 %"111" to ptr
%"110" = load float, ptr %"154", align 4
store float %"110", ptr addrspace(5) %"96", align 4
%"112" = load i64, ptr addrspace(5) %"94", align 4
%"112" = load i64, ptr addrspace(5) %"94", align 8
%"155" = inttoptr i64 %"112" to ptr
%"59" = getelementptr inbounds i8, ptr %"155", i64 4
%"113" = load float, ptr %"59", align 4
store float %"113", ptr addrspace(5) %"97", align 4
%"114" = load i64, ptr addrspace(5) %"94", align 4
%"114" = load i64, ptr addrspace(5) %"94", align 8
%"156" = inttoptr i64 %"114" to ptr
%"61" = getelementptr inbounds i8, ptr %"156", i64 8
%"115" = load float, ptr %"61", align 4
store float %"115", ptr addrspace(5) %"98", align 4
%"116" = load i64, ptr addrspace(5) %"94", align 4
%"116" = load i64, ptr addrspace(5) %"94", align 8
%"157" = inttoptr i64 %"116" to ptr
%"63" = getelementptr inbounds i8, ptr %"157", i64 12
%"117" = load float, ptr %"63", align 4
@ -122,7 +122,7 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"92", ptr addrs
store float %"74", ptr addrspace(5) %"104", align 4
%"120" = load float, ptr addrspace(5) %"104", align 4
store float %"120", ptr addrspace(5) %"100", align 4
%"121" = load i64, ptr addrspace(5) %"95", align 4
%"121" = load i64, ptr addrspace(5) %"95", align 8
%"122" = load float, ptr addrspace(5) %"100", align 4
%"158" = inttoptr i64 %"121" to ptr
store float %"122", ptr %"158", align 4
@ -139,7 +139,7 @@ define amdgpu_kernel void @call_rnd(ptr addrspace(4) byref(i64) %"92", ptr addrs
store float %"77", ptr addrspace(5) %"107", align 4
%"125" = load float, ptr addrspace(5) %"107", align 4
store float %"125", ptr addrspace(5) %"101", align 4
%"126" = load i64, ptr addrspace(5) %"95", align 4
%"126" = load i64, ptr addrspace(5) %"95", align 8
%"159" = inttoptr i64 %"126" to ptr
%"65" = getelementptr inbounds i8, ptr %"159", i64 4
%"127" = load float, ptr addrspace(5) %"101", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"30", ptr addrspace(
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load i32, ptr %"43", align 4
store i32 %"37", ptr addrspace(5) %"34", align 4
%"40" = load i32, ptr addrspace(5) %"34", align 4
%"44" = call i32 @llvm.ctlz.i32(i32 %"40", i1 false)
store i32 %"44", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load i32, ptr addrspace(5) %"34", align 4
%"45" = inttoptr i64 %"41" to ptr
store i32 %"42", ptr %"45", align 4

View File

@ -13,10 +13,10 @@ define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"46", ptr addrspac
br label %"45"
"45": ; preds = %1
%"54" = load i64, ptr addrspace(4) %"46", align 4
store i64 %"54", ptr addrspace(5) %"48", align 4
%"55" = load i64, ptr addrspace(4) %"47", align 4
store i64 %"55", ptr addrspace(5) %"49", align 4
%"54" = load i64, ptr addrspace(4) %"46", align 8
store i64 %"54", ptr addrspace(5) %"48", align 8
%"55" = load i64, ptr addrspace(4) %"47", align 8
store i64 %"55", ptr addrspace(5) %"49", align 8
%"56" = load i16, ptr addrspace(4) @constparams, align 2
store i16 %"56", ptr addrspace(5) %"50", align 2
%"57" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
@ -25,21 +25,21 @@ define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"46", ptr addrspac
store i16 %"58", ptr addrspace(5) %"52", align 2
%"59" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
store i16 %"59", ptr addrspace(5) %"53", align 2
%"60" = load i64, ptr addrspace(5) %"49", align 4
%"60" = load i64, ptr addrspace(5) %"49", align 8
%"61" = load i16, ptr addrspace(5) %"50", align 2
%"72" = inttoptr i64 %"60" to ptr
store i16 %"61", ptr %"72", align 2
%"62" = load i64, ptr addrspace(5) %"49", align 4
%"62" = load i64, ptr addrspace(5) %"49", align 8
%"74" = inttoptr i64 %"62" to ptr
%"40" = getelementptr inbounds i8, ptr %"74", i64 2
%"63" = load i16, ptr addrspace(5) %"51", align 2
store i16 %"63", ptr %"40", align 2
%"64" = load i64, ptr addrspace(5) %"49", align 4
%"64" = load i64, ptr addrspace(5) %"49", align 8
%"76" = inttoptr i64 %"64" to ptr
%"42" = getelementptr inbounds i8, ptr %"76", i64 4
%"65" = load i16, ptr addrspace(5) %"52", align 2
store i16 %"65", ptr %"42", align 2
%"66" = load i64, ptr addrspace(5) %"49", align 4
%"66" = load i64, ptr addrspace(5) %"49", align 8
%"78" = inttoptr i64 %"66" to ptr
%"44" = getelementptr inbounds i8, ptr %"78", i64 6
%"67" = load i16, ptr addrspace(5) %"53", align 2

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"31", ptr a
br label %"30"
"30": ; preds = %1
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"37", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(5) %"33", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"37" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"37", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(5) %"33", align 8
%"44" = inttoptr i64 %"39" to ptr
%"38" = load float, ptr %"44", align 4
store float %"38", ptr addrspace(5) %"35", align 4
%"41" = load float, ptr addrspace(5) %"35", align 4
%"40" = fmul float %"41", 5.000000e-01
store float %"40", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 8
%"43" = load float, ptr addrspace(5) %"35", align 4
%"45" = inttoptr i64 %"42" to ptr
store float %"43", ptr %"45", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"31",
br label %"30"
"30": ; preds = %1
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"37", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(5) %"33", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"37" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"37", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(5) %"33", align 8
%"44" = inttoptr i64 %"39" to ptr
%"38" = load i32, ptr %"44", align 4
store i32 %"38", ptr addrspace(5) %"35", align 4
%"41" = load i32, ptr addrspace(5) %"35", align 4
%"40" = mul i32 %"41", -1
store i32 %"40", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 8
%"43" = load i32, ptr addrspace(5) %"35", align 4
%"45" = inttoptr i64 %"42" to ptr
store i32 %"43", ptr %"45", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"30", ptr addrspace(
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load float, ptr %"43", align 4
store float %"37", ptr addrspace(5) %"34", align 4
%"40" = load float, ptr addrspace(5) %"34", align 4
%"39" = call afn float @llvm.cos.f32(float %"40")
store float %"39", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load float, ptr addrspace(5) %"34", align 4
%"44" = inttoptr i64 %"41" to ptr
store float %"42", ptr %"44", align 4

View File

@ -9,18 +9,18 @@ define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"31", ptr ad
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"45" = inttoptr i64 %"40" to ptr addrspace(1)
%"39" = load float, ptr addrspace(1) %"45", align 4
store float %"39", ptr addrspace(5) %"35", align 4
%"42" = load float, ptr addrspace(5) %"35", align 4
%"41" = fpext float %"42" to double
store double %"41", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"34", align 4
%"43" = load i64, ptr addrspace(5) %"34", align 8
%"44" = load double, ptr addrspace(5) %"36", align 8
%"46" = inttoptr i64 %"43" to ptr
store double %"44", ptr %"46", align 8

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"35", ptr addrsp
br label %"34"
"34": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(5) %"37", align 8
%"55" = inttoptr i64 %"44" to ptr
%"43" = load float, ptr %"55", align 4
store float %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"56" = inttoptr i64 %"45" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"46" = load float, ptr %"31", align 4
@ -30,11 +30,11 @@ define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"35", ptr addrsp
%3 = call float @llvm.roundeven.f32(float %"50")
%"49" = freeze float %3
store float %"49", ptr addrspace(5) %"40", align 4
%"51" = load i64, ptr addrspace(5) %"38", align 4
%"51" = load i64, ptr addrspace(5) %"38", align 8
%"52" = load float, ptr addrspace(5) %"39", align 4
%"57" = inttoptr i64 %"51" to ptr
store float %"52", ptr %"57", align 4
%"53" = load i64, ptr addrspace(5) %"38", align 4
%"53" = load i64, ptr addrspace(5) %"38", align 8
%"58" = inttoptr i64 %"53" to ptr
%"33" = getelementptr inbounds i8, ptr %"58", i64 4
%"54" = load float, ptr addrspace(5) %"40", align 4

View File

@ -9,11 +9,11 @@ define amdgpu_kernel void @cvt_rni_u16_f32(ptr addrspace(4) byref(i64) %"31", pt
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"45" = inttoptr i64 %"40" to ptr addrspace(1)
%"39" = load float, ptr addrspace(1) %"45", align 4
store float %"39", ptr addrspace(5) %"35", align 4
@ -21,7 +21,7 @@ define amdgpu_kernel void @cvt_rni_u16_f32(ptr addrspace(4) byref(i64) %"31", pt
%2 = call float @llvm.roundeven.f32(float %"42")
%"41" = call i16 @llvm.fptoui.sat.i16.f32(float %2)
store i16 %"41", ptr addrspace(5) %"36", align 2
%"43" = load i64, ptr addrspace(5) %"34", align 4
%"43" = load i64, ptr addrspace(5) %"34", align 8
%"44" = load i16, ptr addrspace(5) %"36", align 2
%"46" = inttoptr i64 %"43" to ptr
store i16 %"44", ptr %"46", align 2

View File

@ -10,15 +10,15 @@ define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"35", ptr addrsp
"34": ; preds = %1
call void @llvm.amdgcn.s.setreg(i32 6145, i32 3)
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(5) %"37", align 8
%"55" = inttoptr i64 %"44" to ptr
%"43" = load float, ptr %"55", align 4
store float %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"56" = inttoptr i64 %"45" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"46" = load float, ptr %"31", align 4
@ -31,11 +31,11 @@ define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"35", ptr addrsp
%3 = call float @llvm.trunc.f32(float %"50")
%"49" = freeze float %3
store float %"49", ptr addrspace(5) %"40", align 4
%"51" = load i64, ptr addrspace(5) %"38", align 4
%"51" = load i64, ptr addrspace(5) %"38", align 8
%"52" = load float, ptr addrspace(5) %"39", align 4
%"57" = inttoptr i64 %"51" to ptr
store float %"52", ptr %"57", align 4
%"53" = load i64, ptr addrspace(5) %"38", align 4
%"53" = load i64, ptr addrspace(5) %"38", align 8
%"58" = inttoptr i64 %"53" to ptr
%"33" = getelementptr inbounds i8, ptr %"58", i64 4
%"54" = load float, ptr addrspace(5) %"40", align 4

View File

@ -9,11 +9,11 @@ define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"31", ptr add
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"45" = inttoptr i64 %"40" to ptr addrspace(1)
%"39" = load i32, ptr addrspace(1) %"45", align 4
store i32 %"39", ptr addrspace(5) %"36", align 4
@ -22,7 +22,7 @@ define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"31", ptr add
%"46" = sext i8 %2 to i16
%"41" = sext i16 %"46" to i32
store i32 %"41", ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"34", align 4
%"43" = load i64, ptr addrspace(5) %"34", align 8
%"44" = load i32, ptr addrspace(5) %"35", align 4
%"48" = inttoptr i64 %"43" to ptr
store i32 %"44", ptr %"48", align 4

View File

@ -9,16 +9,16 @@ define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"35", ptr ad
br label %"34"
"34": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(5) %"37", align 8
%"56" = inttoptr i64 %"44" to ptr
%"55" = load float, ptr %"56", align 4
%"43" = bitcast float %"55" to i32
store i32 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"57" = inttoptr i64 %"45" to ptr
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
%"58" = load float, ptr %"31", align 4
@ -36,11 +36,11 @@ define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"35", ptr ad
%5 = fptosi float %4 to i32
%"61" = freeze i32 %5
store i32 %"61", ptr addrspace(5) %"40", align 4
%"51" = load i64, ptr addrspace(5) %"38", align 4
%"51" = load i64, ptr addrspace(5) %"38", align 8
%"52" = load i32, ptr addrspace(5) %"39", align 4
%"63" = inttoptr i64 %"51" to ptr addrspace(1)
store i32 %"52", ptr addrspace(1) %"63", align 4
%"53" = load i64, ptr addrspace(5) %"38", align 4
%"53" = load i64, ptr addrspace(5) %"38", align 8
%"65" = inttoptr i64 %"53" to ptr addrspace(1)
%"33" = getelementptr inbounds i8, ptr addrspace(1) %"65", i64 4
%"54" = load i32, ptr addrspace(5) %"40", align 4

View File

@ -9,21 +9,21 @@ define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"31", ptr ad
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"46" = inttoptr i64 %"40" to ptr
%"45" = load i32, ptr %"46", align 4
store i32 %"45", ptr addrspace(5) %"35", align 4
%"42" = load i32, ptr addrspace(5) %"35", align 4
%"41" = sext i32 %"42" to i64
store i64 %"41", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"34", align 4
%"44" = load i64, ptr addrspace(5) %"36", align 4
store i64 %"41", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"34", align 8
%"44" = load i64, ptr addrspace(5) %"36", align 8
%"47" = inttoptr i64 %"43" to ptr
store i64 %"44", ptr %"47", align 4
store i64 %"44", ptr %"47", align 8
ret void
}

View File

@ -10,11 +10,11 @@ define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"32", ptr ad
br label %"31"
"31": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"39", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"40", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"39", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"40", ptr addrspace(5) %"35", align 8
%"42" = load i64, ptr addrspace(5) %"34", align 8
%"49" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"49", align 4
store i32 %"41", ptr addrspace(5) %"36", align 4
@ -24,7 +24,7 @@ define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"32", ptr ad
store i32 %3, ptr addrspace(5) %"37", align 4
%"46" = load i32, ptr addrspace(5) %"37", align 4
store i32 %"46", ptr addrspace(5) %"38", align 4
%"47" = load i64, ptr addrspace(5) %"35", align 4
%"47" = load i64, ptr addrspace(5) %"35", align 8
%"48" = load i32, ptr addrspace(5) %"38", align 4
%"50" = inttoptr i64 %"47" to ptr
store i32 %"48", ptr %"50", align 4

View File

@ -8,23 +8,23 @@ define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"30", ptr addrspace
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%2 = inttoptr i64 %"38" to ptr
%"45" = addrspacecast ptr %2 to ptr addrspace(1)
store ptr addrspace(1) %"45", ptr addrspace(5) %"32", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 8
%3 = inttoptr i64 %"40" to ptr
%"47" = addrspacecast ptr %3 to ptr addrspace(1)
store ptr addrspace(1) %"47", ptr addrspace(5) %"33", align 8
%"42" = load i64, ptr addrspace(5) %"32", align 4
%"42" = load i64, ptr addrspace(5) %"32", align 8
%"49" = inttoptr i64 %"42" to ptr addrspace(1)
%"41" = load float, ptr addrspace(1) %"49", align 4
store float %"41", ptr addrspace(5) %"34", align 4
%"43" = load i64, ptr addrspace(5) %"33", align 4
%"43" = load i64, ptr addrspace(5) %"33", align 8
%"44" = load float, ptr addrspace(5) %"34", align 4
%"50" = inttoptr i64 %"43" to ptr addrspace(1)
store float %"44", ptr addrspace(1) %"50", align 4

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"33", ptr add
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load float, ptr %"50", align 4
store float %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load float, ptr %"31", align 4
@ -26,7 +26,7 @@ define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"33", ptr add
%"47" = load float, ptr addrspace(5) %"38", align 4
%"45" = fdiv arcp afn float %"46", %"47"
store float %"45", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load float, ptr addrspace(5) %"37", align 4
%"52" = inttoptr i64 %"48" to ptr
store float %"49", ptr %"52", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"30", ptr addrspace(
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load float, ptr %"43", align 4
store float %"37", ptr addrspace(5) %"34", align 4
%"40" = load float, ptr addrspace(5) %"34", align 4
%"39" = call float @llvm.amdgcn.exp2.f32(float %"40")
store float %"39", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load float, ptr addrspace(5) %"34", align 4
%"44" = inttoptr i64 %"41" to ptr
store float %"42", ptr %"44", align 4

View File

@ -13,29 +13,29 @@ define amdgpu_kernel void @extern_func(ptr addrspace(4) byref(i64) %"44", ptr ad
br label %"41"
"41": ; preds = %1
%"50" = load i64, ptr addrspace(4) %"44", align 4
store i64 %"50", ptr addrspace(5) %"46", align 4
%"51" = load i64, ptr addrspace(4) %"45", align 4
store i64 %"51", ptr addrspace(5) %"47", align 4
%"53" = load i64, ptr addrspace(5) %"46", align 4
%"50" = load i64, ptr addrspace(4) %"44", align 8
store i64 %"50", ptr addrspace(5) %"46", align 8
%"51" = load i64, ptr addrspace(4) %"45", align 8
store i64 %"51", ptr addrspace(5) %"47", align 8
%"53" = load i64, ptr addrspace(5) %"46", align 8
%"61" = inttoptr i64 %"53" to ptr addrspace(1)
%"52" = load i64, ptr addrspace(1) %"61", align 4
store i64 %"52", ptr addrspace(5) %"48", align 4
%"52" = load i64, ptr addrspace(1) %"61", align 8
store i64 %"52", ptr addrspace(5) %"48", align 8
%"55" = getelementptr inbounds i8, ptr addrspace(5) %"54", i64 0
%"56" = load i64, ptr addrspace(5) %"48", align 4
store i64 %"56", ptr addrspace(5) %"55", align 4
%"39" = load i64, ptr addrspace(5) %"54", align 4
%"56" = load i64, ptr addrspace(5) %"48", align 8
store i64 %"56", ptr addrspace(5) %"55", align 8
%"39" = load i64, ptr addrspace(5) %"54", align 8
%"40" = call [16 x i8] @foobar(i64 %"39")
br label %"42"
"42": ; preds = %"41"
store [16 x i8] %"40", ptr addrspace(5) %"57", align 1
%"58" = load i64, ptr addrspace(5) %"57", align 4
store i64 %"58", ptr addrspace(5) %"49", align 4
%"59" = load i64, ptr addrspace(5) %"47", align 4
%"60" = load i64, ptr addrspace(5) %"49", align 4
%"58" = load i64, ptr addrspace(5) %"57", align 8
store i64 %"58", ptr addrspace(5) %"49", align 8
%"59" = load i64, ptr addrspace(5) %"47", align 8
%"60" = load i64, ptr addrspace(5) %"49", align 8
%"64" = inttoptr i64 %"59" to ptr
store i64 %"60", ptr %"64", align 4
store i64 %"60", ptr %"64", align 8
ret void
}

View File

@ -10,22 +10,22 @@ define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"31", ptr
br label %"30"
"30": ; preds = %1
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"37", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(5) %"33", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"37" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"37", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(5) %"33", align 8
%"44" = inttoptr i64 %"39" to ptr addrspace(1)
%"38" = load i64, ptr addrspace(1) %"44", align 4
store i64 %"38", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(5) %"35", align 4
store i64 %"40", ptr addrspace(3) @shared_mem, align 4
%"41" = load i64, ptr addrspace(3) @shared_mem, align 4
store i64 %"41", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"38" = load i64, ptr addrspace(1) %"44", align 8
store i64 %"38", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(5) %"35", align 8
store i64 %"40", ptr addrspace(3) @shared_mem, align 8
%"41" = load i64, ptr addrspace(3) @shared_mem, align 8
store i64 %"41", ptr addrspace(5) %"35", align 8
%"42" = load i64, ptr addrspace(5) %"34", align 8
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"47" = inttoptr i64 %"42" to ptr addrspace(1)
store i64 %"43", ptr addrspace(1) %"47", align 4
store i64 %"43", ptr addrspace(1) %"47", align 8
ret void
}

View File

@ -8,13 +8,13 @@ define void @incr_shared_2_global() #0 {
br label %"33"
"33": ; preds = %1
%"37" = load i64, ptr addrspace(3) @shared_mem, align 4
store i64 %"37", ptr addrspace(5) %"36", align 4
%"39" = load i64, ptr addrspace(5) %"36", align 4
%"37" = load i64, ptr addrspace(3) @shared_mem, align 8
store i64 %"37", ptr addrspace(5) %"36", align 8
%"39" = load i64, ptr addrspace(5) %"36", align 8
%"38" = add i64 %"39", 2
store i64 %"38", ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr addrspace(5) %"36", align 4
store i64 %"40", ptr addrspace(3) @shared_mem, align 4
store i64 %"38", ptr addrspace(5) %"36", align 8
%"40" = load i64, ptr addrspace(5) %"36", align 8
store i64 %"40", ptr addrspace(3) @shared_mem, align 8
ret void
}
@ -28,26 +28,26 @@ define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"41",
br label %"34"
"34": ; preds = %1
%"46" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"46", ptr addrspace(5) %"43", align 4
%"47" = load i64, ptr addrspace(4) %"42", align 4
store i64 %"47", ptr addrspace(5) %"44", align 4
%"49" = load i64, ptr addrspace(5) %"43", align 4
%"46" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"46", ptr addrspace(5) %"43", align 8
%"47" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"47", ptr addrspace(5) %"44", align 8
%"49" = load i64, ptr addrspace(5) %"43", align 8
%"56" = inttoptr i64 %"49" to ptr addrspace(1)
%"48" = load i64, ptr addrspace(1) %"56", align 4
store i64 %"48", ptr addrspace(5) %"45", align 4
%"50" = load i64, ptr addrspace(5) %"45", align 4
store i64 %"50", ptr addrspace(3) @shared_mem, align 4
%"48" = load i64, ptr addrspace(1) %"56", align 8
store i64 %"48", ptr addrspace(5) %"45", align 8
%"50" = load i64, ptr addrspace(5) %"45", align 8
store i64 %"50", ptr addrspace(3) @shared_mem, align 8
call void @incr_shared_2_global()
br label %"35"
"35": ; preds = %"34"
%"51" = load i64, ptr addrspace(3) @shared_mem, align 4
store i64 %"51", ptr addrspace(5) %"45", align 4
%"52" = load i64, ptr addrspace(5) %"44", align 4
%"53" = load i64, ptr addrspace(5) %"45", align 4
%"51" = load i64, ptr addrspace(3) @shared_mem, align 8
store i64 %"51", ptr addrspace(5) %"45", align 8
%"52" = load i64, ptr addrspace(5) %"44", align 8
%"53" = load i64, ptr addrspace(5) %"45", align 8
%"59" = inttoptr i64 %"52" to ptr addrspace(1)
store i64 %"53", ptr addrspace(1) %"59", align 4
store i64 %"53", ptr addrspace(1) %"59", align 8
ret void
}

View File

@ -10,20 +10,20 @@ define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"36", ptr addrspace(
br label %"35"
"35": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"43", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"44", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"43", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"44", ptr addrspace(5) %"39", align 8
%"46" = load i64, ptr addrspace(5) %"38", align 8
%"57" = inttoptr i64 %"46" to ptr
%"45" = load float, ptr %"57", align 4
store float %"45", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 8
%"58" = inttoptr i64 %"47" to ptr
%"32" = getelementptr inbounds i8, ptr %"58", i64 4
%"48" = load float, ptr %"32", align 4
store float %"48", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 8
%"59" = inttoptr i64 %"49" to ptr
%"34" = getelementptr inbounds i8, ptr %"59", i64 8
%"50" = load float, ptr %"34", align 4
@ -33,7 +33,7 @@ define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"36", ptr addrspace(
%"54" = load float, ptr addrspace(5) %"42", align 4
%"51" = call float @llvm.fma.f32(float %"52", float %"53", float %"54")
store float %"51", ptr addrspace(5) %"40", align 4
%"55" = load i64, ptr addrspace(5) %"39", align 4
%"55" = load i64, ptr addrspace(5) %"39", align 8
%"56" = load float, ptr addrspace(5) %"40", align 4
%"60" = inttoptr i64 %"55" to ptr
store float %"56", ptr %"60", align 4

View File

@ -11,16 +11,16 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"35", ptr addrspace
br label %"34"
"34": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"43", ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"44", ptr addrspace(5) %"38", align 4
%"46" = load i64, ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"43", ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"44", ptr addrspace(5) %"38", align 8
%"46" = load i64, ptr addrspace(5) %"37", align 8
%"55" = inttoptr i64 %"46" to ptr
%"54" = load i16, ptr %"55", align 2
%"45" = bitcast i16 %"54" to half
store half %"45", ptr addrspace(5) %"39", align 2
%"47" = load i64, ptr addrspace(5) %"37", align 4
%"47" = load i64, ptr addrspace(5) %"37", align 8
%"56" = inttoptr i64 %"47" to ptr
%"33" = getelementptr inbounds i8, ptr %"56", i64 2
%"57" = load i16, ptr %"33", align 2
@ -30,7 +30,7 @@ define amdgpu_kernel void @fmax(ptr addrspace(4) byref(i64) %"35", ptr addrspace
%"51" = load half, ptr addrspace(5) %"39", align 2
%"49" = call half @llvm.maxnum.f16(half %"50", half %"51")
store half %"49", ptr addrspace(5) %"41", align 2
%"52" = load i64, ptr addrspace(5) %"38", align 4
%"52" = load i64, ptr addrspace(5) %"38", align 8
%"53" = load half, ptr addrspace(5) %"41", align 2
%"58" = inttoptr i64 %"52" to ptr
%"59" = bitcast half %"53" to i16

View File

@ -10,14 +10,14 @@ define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"31", ptr a
br label %"30"
"30": ; preds = %1
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"37", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(5) %"33", align 4
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"33", align 8
%"37" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"37", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(5) %"33", align 8
%"43" = inttoptr i64 %"39" to ptr addrspace(1)
%"38" = load i32, ptr addrspace(1) %"43", align 4
store i32 %"38", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"34", align 8
%"41" = load i32, ptr addrspace(5) %"35", align 4
%"44" = inttoptr i64 %"40" to ptr addrspace(1)
store i32 %"41", ptr addrspace(1) %"44", align 4

View File

@ -12,11 +12,11 @@ define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"36", ptr ad
br label %"33"
"33": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"43", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"44", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"43", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"44", ptr addrspace(5) %"39", align 8
%"46" = load i64, ptr addrspace(5) %"38", align 8
%"56" = inttoptr i64 %"46" to ptr
%"55" = load i32, ptr %"56", align 4
store i32 %"55", ptr addrspace(5) %"40", align 4
@ -32,7 +32,7 @@ define amdgpu_kernel void @lanemask_lt(ptr addrspace(4) byref(i64) %"36", ptr ad
%"52" = load i32, ptr addrspace(5) %"42", align 4
%"60" = add i32 %"51", %"52"
store i32 %"60", ptr addrspace(5) %"41", align 4
%"53" = load i64, ptr addrspace(5) %"39", align 4
%"53" = load i64, ptr addrspace(5) %"39", align 8
%"54" = load i32, ptr addrspace(5) %"41", align 4
%"63" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"63", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"30", ptr addrspac
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"41" = inttoptr i64 %"38" to ptr
%"37" = load i64, ptr %"41", align 4
store i64 %"37", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(5) %"33", align 4
%"40" = load i64, ptr addrspace(5) %"34", align 4
%"37" = load i64, ptr %"41", align 8
store i64 %"37", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(5) %"33", align 8
%"40" = load i64, ptr addrspace(5) %"34", align 8
%"42" = inttoptr i64 %"39" to ptr
store i64 %"40", ptr %"42", align 4
store i64 %"40", ptr %"42", align 8
ret void
}

View File

@ -8,19 +8,19 @@ define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"31", ptr
br label %"30"
"30": ; preds = %1
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"37", ptr addrspace(5) %"34", align 4
store i64 81985529216486895, ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"37" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"37", ptr addrspace(5) %"34", align 8
store i64 81985529216486895, ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"44" = inttoptr i64 %"40" to ptr addrspace(1)
%"43" = load float, ptr addrspace(1) %"44", align 4
%2 = bitcast float %"43" to i32
%"39" = zext i32 %2 to i64
store i64 %"39", ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr addrspace(5) %"34", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
store i64 %"39", ptr addrspace(5) %"35", align 8
%"41" = load i64, ptr addrspace(5) %"34", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"45" = inttoptr i64 %"41" to ptr addrspace(1)
%3 = trunc i64 %"42" to i32
%"46" = bitcast i32 %3 to float

View File

@ -9,24 +9,24 @@ define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"35", ptr a
br label %"34"
"34": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(5) %"37", align 8
%"51" = inttoptr i64 %"44" to ptr
%"43" = load i32, ptr %"51", align 4
store i32 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"52" = inttoptr i64 %"45" to ptr
%"31" = getelementptr inbounds i8, ptr %"52", i64 4
%"46" = load i32, ptr %"31", align 4
store i32 %"46", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 8
%"48" = load i32, ptr addrspace(5) %"40", align 4
%"53" = inttoptr i64 %"47" to ptr
store i32 %"48", ptr %"53", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 8
%"54" = inttoptr i64 %"49" to ptr
%"33" = getelementptr inbounds i8, ptr %"54", i64 4
%"50" = load i32, ptr addrspace(5) %"39", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"30", ptr addrspace(
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load float, ptr %"43", align 4
store float %"37", ptr addrspace(5) %"34", align 4
%"40" = load float, ptr addrspace(5) %"34", align 4
%"39" = call float @llvm.amdgcn.log.f32(float %"40")
store float %"39", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load float, ptr addrspace(5) %"34", align 4
%"44" = inttoptr i64 %"41" to ptr
store float %"42", ptr %"44", align 4

View File

@ -9,18 +9,18 @@ define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"31", ptr ad
br label %"30"
"30": ; preds = %1
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"37", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(5) %"33", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"37" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"37", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(5) %"33", align 8
%"42" = inttoptr i64 %"39" to ptr
%"38" = load i64, ptr %"42", align 4
store i64 %"38", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"35", align 4
%"38" = load i64, ptr %"42", align 8
store i64 %"38", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(5) %"34", align 8
%"41" = load i64, ptr addrspace(5) %"35", align 8
%"43" = inttoptr i64 %"40" to ptr
store i64 %"41", ptr %"43", align 4
store i64 %"41", ptr %"43", align 8
ret void
}

View File

@ -11,20 +11,20 @@ define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"37", ptr addrsp
br label %"36"
"36": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"45", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"46", ptr addrspace(5) %"40", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"45", ptr addrspace(5) %"39", align 8
%"46" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"46", ptr addrspace(5) %"40", align 8
%"48" = load i64, ptr addrspace(5) %"39", align 8
%"59" = inttoptr i64 %"48" to ptr
%"47" = load i32, ptr %"59", align 4
store i32 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"39", align 8
%"60" = inttoptr i64 %"49" to ptr
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
%"50" = load i32, ptr %"33", align 4
store i32 %"50", ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr addrspace(5) %"39", align 4
%"51" = load i64, ptr addrspace(5) %"39", align 8
%"61" = inttoptr i64 %"51" to ptr
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
%"52" = load i32, ptr %"35", align 4
@ -35,7 +35,7 @@ define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"37", ptr addrsp
%2 = mul i32 %"54", %"55"
%"53" = add i32 %2, %"56"
store i32 %"53", ptr addrspace(5) %"41", align 4
%"57" = load i64, ptr addrspace(5) %"40", align 4
%"57" = load i64, ptr addrspace(5) %"40", align 8
%"58" = load i32, ptr addrspace(5) %"41", align 4
%"62" = inttoptr i64 %"57" to ptr
store i32 %"58", ptr %"62", align 4

View File

@ -11,36 +11,36 @@ define amdgpu_kernel void @mad_wide(ptr addrspace(4) byref(i64) %"37", ptr addrs
br label %"36"
"36": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"45", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"46", ptr addrspace(5) %"40", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"45", ptr addrspace(5) %"39", align 8
%"46" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"46", ptr addrspace(5) %"40", align 8
%"48" = load i64, ptr addrspace(5) %"39", align 8
%"59" = inttoptr i64 %"48" to ptr
%"47" = load i32, ptr %"59", align 4
store i32 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"39", align 8
%"60" = inttoptr i64 %"49" to ptr
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
%"50" = load i32, ptr %"33", align 4
store i32 %"50", ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr addrspace(5) %"39", align 4
%"51" = load i64, ptr addrspace(5) %"39", align 8
%"61" = inttoptr i64 %"51" to ptr
%"35" = getelementptr inbounds i8, ptr %"61", i64 8
%"52" = load i64, ptr %"35", align 4
store i64 %"52", ptr addrspace(5) %"44", align 4
%"52" = load i64, ptr %"35", align 8
store i64 %"52", ptr addrspace(5) %"44", align 8
%"54" = load i32, ptr addrspace(5) %"42", align 4
%"55" = load i32, ptr addrspace(5) %"43", align 4
%"56" = load i64, ptr addrspace(5) %"44", align 4
%"56" = load i64, ptr addrspace(5) %"44", align 8
%2 = sext i32 %"54" to i64
%3 = sext i32 %"55" to i64
%4 = mul i64 %2, %3
%"53" = add i64 %4, %"56"
store i64 %"53", ptr addrspace(5) %"41", align 4
%"57" = load i64, ptr addrspace(5) %"40", align 4
%"58" = load i64, ptr addrspace(5) %"41", align 4
store i64 %"53", ptr addrspace(5) %"41", align 8
%"57" = load i64, ptr addrspace(5) %"40", align 8
%"58" = load i64, ptr addrspace(5) %"41", align 8
%"62" = inttoptr i64 %"57" to ptr
store i64 %"58", ptr %"62", align 4
store i64 %"58", ptr %"62", align 8
ret void
}

View File

@ -9,24 +9,24 @@ define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"34", pt
br label %"32"
"32": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
br label %"10"
"10": ; preds = %"32"
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"48" = inttoptr i64 %"43" to ptr
%"42" = load i64, ptr %"48", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"42" = load i64, ptr %"48", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"45" = load i64, ptr addrspace(5) %"38", align 8
%"44" = add i64 %"45", 1
store i64 %"44", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"37", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
store i64 %"44", ptr addrspace(5) %"39", align 8
%"46" = load i64, ptr addrspace(5) %"37", align 8
%"47" = load i64, ptr addrspace(5) %"39", align 8
%"49" = inttoptr i64 %"46" to ptr
store i64 %"47", ptr %"49", align 4
store i64 %"47", ptr %"49", align 8
ret void
}

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"50", align 4
store i32 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load i32, ptr %"31", align 4
@ -26,7 +26,7 @@ define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
%"47" = load i32, ptr addrspace(5) %"38", align 4
%"45" = call i32 @llvm.smax.i32(i32 %"46", i32 %"47")
store i32 %"45", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load i32, ptr addrspace(5) %"37", align 4
%"52" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"52", align 4

View File

@ -8,16 +8,16 @@ define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"30", ptr addrspa
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"42" = inttoptr i64 %"38" to ptr
%"41" = load i32, ptr %"42", align 4
store i32 %"41", ptr addrspace(5) %"34", align 4
fence seq_cst
%"39" = load i64, ptr addrspace(5) %"33", align 4
%"39" = load i64, ptr addrspace(5) %"33", align 8
%"40" = load i32, ptr addrspace(5) %"34", align 4
%"43" = inttoptr i64 %"39" to ptr
store i32 %"40", ptr %"43", align 4

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"50", align 4
store i32 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load i32, ptr %"31", align 4
@ -26,7 +26,7 @@ define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
%"47" = load i32, ptr addrspace(5) %"38", align 4
%"45" = call i32 @llvm.smin.i32(i32 %"46", i32 %"47")
store i32 %"45", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load i32, ptr addrspace(5) %"37", align 4
%"52" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"52", align 4

View File

@ -9,20 +9,20 @@ define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"31", ptr addrspace(
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"45" = inttoptr i64 %"40" to ptr
%"39" = load i64, ptr %"45", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
store i64 %"42", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"34", align 4
%"44" = load i64, ptr addrspace(5) %"36", align 4
%"39" = load i64, ptr %"45", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
store i64 %"42", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"34", align 8
%"44" = load i64, ptr addrspace(5) %"36", align 8
%"46" = inttoptr i64 %"43" to ptr
store i64 %"44", ptr %"46", align 4
store i64 %"44", ptr %"46", align 8
ret void
}

View File

@ -8,7 +8,7 @@ define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"29", ptr ad
"28": ; preds = %1
%"33" = ptrtoint ptr addrspace(5) %"10" to i64
store i64 %"33", ptr addrspace(5) %"31", align 4
store i64 %"33", ptr addrspace(5) %"31", align 8
ret void
}

View File

@ -10,11 +10,11 @@ define amdgpu_kernel void @mul24_hi_s32(ptr addrspace(4) byref(i64) %"32", ptr a
br label %"31"
"31": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"39", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"40", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"39", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"40", ptr addrspace(5) %"35", align 8
%"42" = load i64, ptr addrspace(5) %"34", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"50", align 4
store i32 %"41", ptr addrspace(5) %"36", align 4
@ -29,7 +29,7 @@ define amdgpu_kernel void @mul24_hi_s32(ptr addrspace(4) byref(i64) %"32", ptr a
%5 = shl i32 %3, 16
%"45" = or i32 %4, %5
store i32 %"45", ptr addrspace(5) %"38", align 4
%"48" = load i64, ptr addrspace(5) %"35", align 4
%"48" = load i64, ptr addrspace(5) %"35", align 8
%"49" = load i32, ptr addrspace(5) %"38", align 4
%"51" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"51", align 4

View File

@ -9,11 +9,11 @@ define amdgpu_kernel void @mul24_hi_u32(ptr addrspace(4) byref(i64) %"31", ptr a
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"46" = inttoptr i64 %"40" to ptr
%"39" = load i32, ptr %"46", align 4
store i32 %"39", ptr addrspace(5) %"35", align 4
@ -25,7 +25,7 @@ define amdgpu_kernel void @mul24_hi_u32(ptr addrspace(4) byref(i64) %"31", ptr a
%5 = shl i32 %3, 16
%"41" = or i32 %4, %5
store i32 %"41", ptr addrspace(5) %"36", align 4
%"44" = load i64, ptr addrspace(5) %"34", align 4
%"44" = load i64, ptr addrspace(5) %"34", align 8
%"45" = load i32, ptr addrspace(5) %"36", align 4
%"47" = inttoptr i64 %"44" to ptr
store i32 %"45", ptr %"47", align 4

View File

@ -10,11 +10,11 @@ define amdgpu_kernel void @mul24_lo_s32(ptr addrspace(4) byref(i64) %"32", ptr a
br label %"31"
"31": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"39", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"40", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"39", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"40", ptr addrspace(5) %"35", align 8
%"42" = load i64, ptr addrspace(5) %"34", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"50", align 4
store i32 %"41", ptr addrspace(5) %"36", align 4
@ -25,7 +25,7 @@ define amdgpu_kernel void @mul24_lo_s32(ptr addrspace(4) byref(i64) %"32", ptr a
%"47" = load i32, ptr addrspace(5) %"36", align 4
%"45" = call i32 @llvm.amdgcn.mul.i24(i32 %"46", i32 %"47")
store i32 %"45", ptr addrspace(5) %"38", align 4
%"48" = load i64, ptr addrspace(5) %"35", align 4
%"48" = load i64, ptr addrspace(5) %"35", align 8
%"49" = load i32, ptr addrspace(5) %"38", align 4
%"51" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"51", align 4

View File

@ -9,11 +9,11 @@ define amdgpu_kernel void @mul24_lo_u32(ptr addrspace(4) byref(i64) %"31", ptr a
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"46" = inttoptr i64 %"40" to ptr
%"39" = load i32, ptr %"46", align 4
store i32 %"39", ptr addrspace(5) %"35", align 4
@ -21,7 +21,7 @@ define amdgpu_kernel void @mul24_lo_u32(ptr addrspace(4) byref(i64) %"31", ptr a
%"43" = load i32, ptr addrspace(5) %"35", align 4
%"41" = call i32 @llvm.amdgcn.mul.u24(i32 %"42", i32 %"43")
store i32 %"41", ptr addrspace(5) %"36", align 4
%"44" = load i64, ptr addrspace(5) %"34", align 4
%"44" = load i64, ptr addrspace(5) %"34", align 8
%"45" = load i32, ptr addrspace(5) %"36", align 4
%"47" = inttoptr i64 %"44" to ptr
store i32 %"45", ptr %"47", align 4

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"33", ptr addrsp
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load float, ptr %"50", align 4
store float %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load float, ptr %"31", align 4
@ -26,7 +26,7 @@ define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"33", ptr addrsp
%"47" = load float, ptr addrspace(5) %"38", align 4
%"45" = fmul float %"46", %"47"
store float %"45", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load float, ptr addrspace(5) %"37", align 4
%"52" = inttoptr i64 %"48" to ptr
store float %"49", ptr %"52", align 4

View File

@ -9,24 +9,24 @@ define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"32", ptr addrspa
br label %"31"
"31": ; preds = %1
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr addrspace(5) %"34", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"41" = load i64, ptr addrspace(5) %"34", align 8
%"46" = inttoptr i64 %"41" to ptr
%"40" = load i64, ptr %"46", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr %"46", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%2 = zext i64 %"43" to i128
%3 = mul i128 %2, 2
%4 = lshr i128 %3, 64
%"42" = trunc i128 %4 to i64
store i64 %"42", ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"35", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
store i64 %"42", ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(5) %"35", align 8
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"47" = inttoptr i64 %"44" to ptr
store i64 %"45", ptr %"47", align 4
store i64 %"45", ptr %"47", align 8
ret void
}

View File

@ -9,21 +9,21 @@ define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"32", ptr addrspa
br label %"31"
"31": ; preds = %1
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr addrspace(5) %"34", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"41" = load i64, ptr addrspace(5) %"34", align 8
%"46" = inttoptr i64 %"41" to ptr
%"40" = load i64, ptr %"46", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr %"46", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"42" = mul i64 %"43", 2
store i64 %"42", ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"35", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
store i64 %"42", ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(5) %"35", align 8
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"47" = inttoptr i64 %"44" to ptr
store i64 %"45", ptr %"47", align 4
store i64 %"45", ptr %"47", align 8
ret void
}

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"33", ptr ad
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load float, ptr %"50", align 4
store float %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load float, ptr %"31", align 4
@ -26,7 +26,7 @@ define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"33", ptr ad
%"47" = load float, ptr addrspace(5) %"38", align 4
%"45" = fmul float %"46", %"47"
store float %"45", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load float, ptr addrspace(5) %"37", align 4
%"52" = inttoptr i64 %"48" to ptr
store float %"49", ptr %"52", align 4

View File

@ -10,15 +10,15 @@ define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"34", ptr addrs
br label %"33"
"33": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"41", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"42", ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"36", align 4
%"41" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"41", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"42", ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(5) %"36", align 8
%"52" = inttoptr i64 %"44" to ptr addrspace(1)
%"43" = load i32, ptr addrspace(1) %"52", align 4
store i32 %"43", ptr addrspace(5) %"38", align 4
%"45" = load i64, ptr addrspace(5) %"36", align 4
%"45" = load i64, ptr addrspace(5) %"36", align 8
%"53" = inttoptr i64 %"45" to ptr addrspace(1)
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 4
%"46" = load i32, ptr addrspace(1) %"32", align 4
@ -28,11 +28,11 @@ define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"34", ptr addrs
%2 = sext i32 %"48" to i64
%3 = sext i32 %"49" to i64
%"47" = mul i64 %2, %3
store i64 %"47", ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"37", align 4
%"51" = load i64, ptr addrspace(5) %"40", align 4
store i64 %"47", ptr addrspace(5) %"40", align 8
%"50" = load i64, ptr addrspace(5) %"37", align 8
%"51" = load i64, ptr addrspace(5) %"40", align 8
%"54" = inttoptr i64 %"50" to ptr
store i64 %"51", ptr %"54", align 4
store i64 %"51", ptr %"54", align 8
ret void
}

View File

@ -30,11 +30,11 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"50", pt
br label %"44"
"44": ; preds = %1
%"57" = load i64, ptr addrspace(4) %"50", align 4
store i64 %"57", ptr addrspace(5) %"52", align 4
%"58" = load i64, ptr addrspace(4) %"51", align 4
store i64 %"58", ptr addrspace(5) %"53", align 4
%"60" = load i64, ptr addrspace(5) %"52", align 4
%"57" = load i64, ptr addrspace(4) %"50", align 8
store i64 %"57", ptr addrspace(5) %"52", align 8
%"58" = load i64, ptr addrspace(4) %"51", align 8
store i64 %"58", ptr addrspace(5) %"53", align 8
%"60" = load i64, ptr addrspace(5) %"52", align 8
%"68" = inttoptr i64 %"60" to ptr
%"59" = load i32, ptr %"68", align 4
store i32 %"59", ptr addrspace(5) %"54", align 4
@ -48,7 +48,7 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"50", pt
br label %"45"
"45": ; preds = %"44"
%"64" = load i64, ptr addrspace(5) %"53", align 4
%"64" = load i64, ptr addrspace(5) %"53", align 8
%"65" = load i32, ptr addrspace(5) %"55", align 4
%"69" = inttoptr i64 %"64" to ptr
store i32 %"65", ptr %"69", align 4
@ -56,7 +56,7 @@ define amdgpu_kernel void @multiple_return(ptr addrspace(4) byref(i64) %"50", pt
br i1 %"66", label %"19", label %"20"
"19": ; preds = %"45"
%"67" = load i64, ptr addrspace(5) %"53", align 4
%"67" = load i64, ptr addrspace(5) %"53", align 8
%"70" = inttoptr i64 %"67" to ptr
%"41" = getelementptr inbounds i8, ptr %"70", i64 4
store i32 123, ptr %"41", align 4

View File

@ -0,0 +1,15 @@
declare void @__zluda_ptx_impl_nanosleep_u32(i32) #0
define amdgpu_kernel void @nanosleep(ptr addrspace(4) byref(i64) %"28", ptr addrspace(4) byref(i64) %"29") #1 {
br label %1
1: ; preds = %0
br label %"27"
"27": ; preds = %1
call void @__zluda_ptx_impl_nanosleep_u32(i32 1)
ret void
}
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"30", ptr addrspace(
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load i32, ptr %"43", align 4
store i32 %"37", ptr addrspace(5) %"34", align 4
%"40" = load i32, ptr addrspace(5) %"34", align 4
%"39" = sub i32 0, %"40"
store i32 %"39", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load i32, ptr addrspace(5) %"34", align 4
%"44" = inttoptr i64 %"41" to ptr
store i32 %"42", ptr %"44", align 4

View File

@ -9,11 +9,11 @@ define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"3
br label %"33"
"33": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"42" = load i64, ptr addrspace(5) %"36", align 8
%"50" = inttoptr i64 %"42" to ptr addrspace(1)
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 8
%"30" = load <2 x i32>, ptr addrspace(1) %"32", align 8
@ -25,7 +25,7 @@ define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"3
%"47" = load i32, ptr addrspace(5) %"39", align 4
%"45" = add i32 %"46", %"47"
store i32 %"45", ptr addrspace(5) %"38", align 4
%"48" = load i64, ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"37", align 8
%"49" = load i32, ptr addrspace(5) %"38", align 4
%"51" = inttoptr i64 %"48" to ptr addrspace(1)
store i32 %"49", ptr addrspace(1) %"51", align 4

View File

@ -9,21 +9,21 @@ define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"31", ptr addrspace(
br label %"30"
"30": ; preds = %1
%"37" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"37", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(4) %"32", align 4
store i64 %"38", ptr addrspace(5) %"34", align 4
%"40" = load i64, ptr addrspace(5) %"33", align 4
%"37" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"37", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(4) %"32", align 8
store i64 %"38", ptr addrspace(5) %"34", align 8
%"40" = load i64, ptr addrspace(5) %"33", align 8
%"45" = inttoptr i64 %"40" to ptr
%"39" = load i64, ptr %"45", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr %"45", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"46" = xor i64 %"42", -1
store i64 %"46", ptr addrspace(5) %"36", align 4
%"43" = load i64, ptr addrspace(5) %"34", align 4
%"44" = load i64, ptr addrspace(5) %"36", align 4
store i64 %"46", ptr addrspace(5) %"36", align 8
%"43" = load i64, ptr addrspace(5) %"34", align 8
%"44" = load i64, ptr addrspace(5) %"36", align 8
%"48" = inttoptr i64 %"43" to ptr
store i64 %"44", ptr %"48", align 4
store i64 %"44", ptr %"48", align 8
ret void
}

View File

@ -11,11 +11,11 @@ define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"35", ptr addrspace
br label %"32"
"32": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"42" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"42", ptr addrspace(5) %"38", align 8
%"44" = load i64, ptr addrspace(5) %"37", align 8
%"51" = inttoptr i64 %"44" to ptr
%"43" = load i32, ptr %"51", align 4
store i32 %"43", ptr addrspace(5) %"39", align 4
@ -28,7 +28,7 @@ define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"35", ptr addrspace
%"48" = load i32, ptr addrspace(5) %"40", align 4
%"46" = add i32 %"47", %"48"
store i32 %"46", ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 4
%"49" = load i64, ptr addrspace(5) %"38", align 8
%"50" = load i32, ptr addrspace(5) %"39", align 4
%"52" = inttoptr i64 %"49" to ptr
store i32 %"50", ptr %"52", align 4

View File

@ -9,27 +9,27 @@ define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i64, ptr %"50", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr %"50", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 8
%"44" = load i64, ptr %"31", align 4
store i64 %"44", ptr addrspace(5) %"38", align 4
%"46" = load i64, ptr addrspace(5) %"37", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr %"31", align 8
store i64 %"44", ptr addrspace(5) %"38", align 8
%"46" = load i64, ptr addrspace(5) %"37", align 8
%"47" = load i64, ptr addrspace(5) %"38", align 8
%"52" = or i64 %"46", %"47"
store i64 %"52", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"49" = load i64, ptr addrspace(5) %"37", align 4
store i64 %"52", ptr addrspace(5) %"37", align 8
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load i64, ptr addrspace(5) %"37", align 8
%"55" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"55", align 4
store i64 %"49", ptr %"55", align 8
ret void
}

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"30", ptr addrspace
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load i32, ptr %"43", align 4
store i32 %"37", ptr addrspace(5) %"34", align 4
%"40" = load i32, ptr addrspace(5) %"34", align 4
%"44" = call i32 @llvm.ctpop.i32(i32 %"40")
store i32 %"44", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load i32, ptr addrspace(5) %"34", align 4
%"45" = inttoptr i64 %"41" to ptr
store i32 %"42", ptr %"45", align 4

View File

@ -11,21 +11,21 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"41", ptr addrs
br label %"40"
"40": ; preds = %1
%"49" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"49", ptr addrspace(5) %"43", align 4
%"50" = load i64, ptr addrspace(4) %"42", align 4
store i64 %"50", ptr addrspace(5) %"44", align 4
%"52" = load i64, ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"49", ptr addrspace(5) %"43", align 8
%"50" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"50", ptr addrspace(5) %"44", align 8
%"52" = load i64, ptr addrspace(5) %"43", align 8
%"66" = inttoptr i64 %"52" to ptr
%"51" = load i64, ptr %"66", align 4
store i64 %"51", ptr addrspace(5) %"45", align 4
%"53" = load i64, ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr %"66", align 8
store i64 %"51", ptr addrspace(5) %"45", align 8
%"53" = load i64, ptr addrspace(5) %"43", align 8
%"67" = inttoptr i64 %"53" to ptr
%"37" = getelementptr inbounds i8, ptr %"67", i64 8
%"54" = load i64, ptr %"37", align 4
store i64 %"54", ptr addrspace(5) %"46", align 4
%"56" = load i64, ptr addrspace(5) %"45", align 4
%"57" = load i64, ptr addrspace(5) %"46", align 4
%"54" = load i64, ptr %"37", align 8
store i64 %"54", ptr addrspace(5) %"46", align 8
%"56" = load i64, ptr addrspace(5) %"45", align 8
%"57" = load i64, ptr addrspace(5) %"46", align 8
%"55" = icmp ult i64 %"56", %"57"
store i1 %"55", ptr addrspace(5) %"48", align 1
%"59" = load i1, ptr addrspace(5) %"48", align 1
@ -35,7 +35,7 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"41", ptr addrs
br i1 %"60", label %"16", label %"17"
"16": ; preds = %"40"
store i64 1, ptr addrspace(5) %"47", align 4
store i64 1, ptr addrspace(5) %"47", align 8
br label %"17"
"17": ; preds = %"16", %"40"
@ -43,14 +43,14 @@ define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"41", ptr addrs
br i1 %"62", label %"19", label %"18"
"18": ; preds = %"17"
store i64 2, ptr addrspace(5) %"47", align 4
store i64 2, ptr addrspace(5) %"47", align 8
br label %"19"
"19": ; preds = %"18", %"17"
%"64" = load i64, ptr addrspace(5) %"44", align 4
%"65" = load i64, ptr addrspace(5) %"47", align 4
%"64" = load i64, ptr addrspace(5) %"44", align 8
%"65" = load i64, ptr addrspace(5) %"47", align 8
%"68" = inttoptr i64 %"64" to ptr
store i64 %"65", ptr %"68", align 4
store i64 %"65", ptr %"68", align 8
ret void
}

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"33", ptr addrspace
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"50", align 4
store i32 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load i32, ptr %"31", align 4
@ -28,7 +28,7 @@ define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"33", ptr addrspace
%3 = bitcast i32 %"47" to <4 x i8>
%"52" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
store <4 x i8> %"52", ptr addrspace(5) %"38", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load i32, ptr addrspace(5) %"38", align 4
%"55" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"55", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"30", ptr addrspace(
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load float, ptr %"43", align 4
store float %"37", ptr addrspace(5) %"34", align 4
%"40" = load float, ptr addrspace(5) %"34", align 4
%"39" = call float @llvm.amdgcn.rcp.f32(float %"40")
store float %"39", ptr addrspace(5) %"34", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load float, ptr addrspace(5) %"34", align 4
%"44" = inttoptr i64 %"41" to ptr
store float %"42", ptr %"44", align 4

View File

@ -9,27 +9,27 @@ define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"37", ptr addr
br label %"36"
"36": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"42", ptr addrspace(5) %"39", align 4
%"43" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"43", ptr addrspace(5) %"40", align 4
%"45" = load i64, ptr addrspace(5) %"39", align 4
%"42" = load i64, ptr addrspace(4) %"37", align 8
store i64 %"42", ptr addrspace(5) %"39", align 8
%"43" = load i64, ptr addrspace(4) %"38", align 8
store i64 %"43", ptr addrspace(5) %"40", align 8
%"45" = load i64, ptr addrspace(5) %"39", align 8
%"51" = inttoptr i64 %"45" to ptr addrspace(1)
%"50" = load i64, ptr addrspace(1) %"51", align 4
store i64 %"50", ptr addrspace(5) %"41", align 4
%"46" = load i64, ptr addrspace(5) %"41", align 4
%"50" = load i64, ptr addrspace(1) %"51", align 8
store i64 %"50", ptr addrspace(5) %"41", align 8
%"46" = load i64, ptr addrspace(5) %"41", align 8
%"31" = add i64 %"46", 1
%"52" = addrspacecast ptr addrspace(5) %"10" to ptr
store i64 %"31", ptr %"52", align 4
store i64 %"31", ptr %"52", align 8
%"54" = addrspacecast ptr addrspace(5) %"10" to ptr
%"33" = getelementptr inbounds i8, ptr %"54", i64 0
%"55" = load i64, ptr %"33", align 4
store i64 %"55", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"55" = load i64, ptr %"33", align 8
store i64 %"55", ptr addrspace(5) %"41", align 8
%"48" = load i64, ptr addrspace(5) %"40", align 8
%"56" = inttoptr i64 %"48" to ptr addrspace(1)
%"35" = getelementptr inbounds i8, ptr addrspace(1) %"56", i64 0
%"49" = load i64, ptr addrspace(5) %"41", align 4
store i64 %"49", ptr addrspace(1) %"35", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 8
store i64 %"49", ptr addrspace(1) %"35", align 8
ret void
}

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
br label %"32"
"32": ; preds = %1
%"39" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"39", ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"42" = load i64, ptr addrspace(5) %"35", align 4
%"39" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"39", ptr addrspace(5) %"35", align 8
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"42" = load i64, ptr addrspace(5) %"35", align 8
%"50" = inttoptr i64 %"42" to ptr
%"41" = load i32, ptr %"50", align 4
store i32 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 4
%"43" = load i64, ptr addrspace(5) %"35", align 8
%"51" = inttoptr i64 %"43" to ptr
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
%"44" = load i32, ptr %"31", align 4
@ -26,7 +26,7 @@ define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"33", ptr addrspace(
%"47" = load i32, ptr addrspace(5) %"38", align 4
%"45" = srem i32 %"46", %"47"
store i32 %"45", ptr addrspace(5) %"37", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 4
%"48" = load i64, ptr addrspace(5) %"36", align 8
%"49" = load i32, ptr addrspace(5) %"37", align 4
%"52" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"52", align 4

View File

@ -8,18 +8,18 @@ define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"30", ptr addrspac
br label %"29"
"29": ; preds = %1
%"35" = load i64, ptr addrspace(4) %"30", align 4
store i64 %"35", ptr addrspace(5) %"32", align 4
%"36" = load i64, ptr addrspace(4) %"31", align 4
store i64 %"36", ptr addrspace(5) %"33", align 4
%"38" = load i64, ptr addrspace(5) %"32", align 4
%"35" = load i64, ptr addrspace(4) %"30", align 8
store i64 %"35", ptr addrspace(5) %"32", align 8
%"36" = load i64, ptr addrspace(4) %"31", align 8
store i64 %"36", ptr addrspace(5) %"33", align 8
%"38" = load i64, ptr addrspace(5) %"32", align 8
%"43" = inttoptr i64 %"38" to ptr
%"37" = load double, ptr %"43", align 8
store double %"37", ptr addrspace(5) %"34", align 8
%"40" = load double, ptr addrspace(5) %"34", align 8
%"39" = call double @llvm.amdgcn.rsq.f64(double %"40")
store double %"39", ptr addrspace(5) %"34", align 8
%"41" = load i64, ptr addrspace(5) %"33", align 4
%"41" = load i64, ptr addrspace(5) %"33", align 8
%"42" = load double, ptr addrspace(5) %"34", align 8
%"44" = inttoptr i64 %"41" to ptr
store double %"42", ptr %"44", align 8

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"34", ptr addrspace
br label %"33"
"33": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"51" = inttoptr i64 %"43" to ptr
%"42" = load i16, ptr %"51", align 2
store i16 %"42", ptr addrspace(5) %"38", align 2
%"44" = load i64, ptr addrspace(5) %"36", align 4
%"44" = load i64, ptr addrspace(5) %"36", align 8
%"52" = inttoptr i64 %"44" to ptr
%"31" = getelementptr inbounds i8, ptr %"52", i64 2
%"45" = load i16, ptr %"31", align 2
@ -26,7 +26,7 @@ define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"34", ptr addrspace
%"48" = load i16, ptr addrspace(5) %"39", align 2
%"46" = select i1 false, i16 %"47", i16 %"48"
store i16 %"46", ptr addrspace(5) %"38", align 2
%"49" = load i64, ptr addrspace(5) %"37", align 4
%"49" = load i64, ptr addrspace(5) %"37", align 8
%"50" = load i16, ptr addrspace(5) %"38", align 2
%"53" = inttoptr i64 %"49" to ptr
store i16 %"50", ptr %"53", align 2

View File

@ -9,15 +9,15 @@ define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"34", ptr addr
br label %"33"
"33": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"40", ptr addrspace(5) %"36", align 4
%"41" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"41", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(5) %"36", align 4
%"40" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"40", ptr addrspace(5) %"36", align 8
%"41" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"41", ptr addrspace(5) %"37", align 8
%"43" = load i64, ptr addrspace(5) %"36", align 8
%"51" = inttoptr i64 %"43" to ptr
%"42" = load i16, ptr %"51", align 2
store i16 %"42", ptr addrspace(5) %"38", align 2
%"44" = load i64, ptr addrspace(5) %"36", align 4
%"44" = load i64, ptr addrspace(5) %"36", align 8
%"52" = inttoptr i64 %"44" to ptr
%"31" = getelementptr inbounds i8, ptr %"52", i64 2
%"45" = load i16, ptr %"31", align 2
@ -26,7 +26,7 @@ define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"34", ptr addr
%"48" = load i16, ptr addrspace(5) %"39", align 2
%"46" = select i1 true, i16 %"47", i16 %"48"
store i16 %"46", ptr addrspace(5) %"38", align 2
%"49" = load i64, ptr addrspace(5) %"37", align 4
%"49" = load i64, ptr addrspace(5) %"37", align 8
%"50" = load i16, ptr addrspace(5) %"38", align 2
%"53" = inttoptr i64 %"49" to ptr
store i16 %"50", ptr %"53", align 2

View File

@ -11,28 +11,28 @@ define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"41", ptr addrspace
br label %"40"
"40": ; preds = %1
%"49" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"49", ptr addrspace(5) %"43", align 4
%"50" = load i64, ptr addrspace(4) %"42", align 4
store i64 %"50", ptr addrspace(5) %"44", align 4
%"52" = load i64, ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(4) %"41", align 8
store i64 %"49", ptr addrspace(5) %"43", align 8
%"50" = load i64, ptr addrspace(4) %"42", align 8
store i64 %"50", ptr addrspace(5) %"44", align 8
%"52" = load i64, ptr addrspace(5) %"43", align 8
%"64" = inttoptr i64 %"52" to ptr
%"51" = load i64, ptr %"64", align 4
store i64 %"51", ptr addrspace(5) %"45", align 4
%"53" = load i64, ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr %"64", align 8
store i64 %"51", ptr addrspace(5) %"45", align 8
%"53" = load i64, ptr addrspace(5) %"43", align 8
%"65" = inttoptr i64 %"53" to ptr
%"37" = getelementptr inbounds i8, ptr %"65", i64 8
%"54" = load i64, ptr %"37", align 4
store i64 %"54", ptr addrspace(5) %"46", align 4
%"56" = load i64, ptr addrspace(5) %"45", align 4
%"57" = load i64, ptr addrspace(5) %"46", align 4
%"54" = load i64, ptr %"37", align 8
store i64 %"54", ptr addrspace(5) %"46", align 8
%"56" = load i64, ptr addrspace(5) %"45", align 8
%"57" = load i64, ptr addrspace(5) %"46", align 8
%"55" = icmp ult i64 %"56", %"57"
store i1 %"55", ptr addrspace(5) %"48", align 1
%"58" = load i1, ptr addrspace(5) %"48", align 1
br i1 %"58", label %"16", label %"17"
"16": ; preds = %"40"
store i64 1, ptr addrspace(5) %"47", align 4
store i64 1, ptr addrspace(5) %"47", align 8
br label %"17"
"17": ; preds = %"16", %"40"
@ -40,14 +40,14 @@ define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"41", ptr addrspace
br i1 %"60", label %"19", label %"18"
"18": ; preds = %"17"
store i64 2, ptr addrspace(5) %"47", align 4
store i64 2, ptr addrspace(5) %"47", align 8
br label %"19"
"19": ; preds = %"18", %"17"
%"62" = load i64, ptr addrspace(5) %"44", align 4
%"63" = load i64, ptr addrspace(5) %"47", align 4
%"62" = load i64, ptr addrspace(5) %"44", align 8
%"63" = load i64, ptr addrspace(5) %"47", align 8
%"66" = inttoptr i64 %"62" to ptr
store i64 %"63", ptr %"66", align 4
store i64 %"63", ptr %"66", align 8
ret void
}

View File

@ -11,15 +11,15 @@ define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"39", ptr addrsp
br label %"38"
"38": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"47", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"48", ptr addrspace(5) %"42", align 4
%"50" = load i64, ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"47", ptr addrspace(5) %"41", align 8
%"48" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"48", ptr addrspace(5) %"42", align 8
%"50" = load i64, ptr addrspace(5) %"41", align 8
%"64" = inttoptr i64 %"50" to ptr
%"49" = load float, ptr %"64", align 4
store float %"49", ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr addrspace(5) %"41", align 4
%"51" = load i64, ptr addrspace(5) %"41", align 8
%"65" = inttoptr i64 %"51" to ptr
%"37" = getelementptr inbounds i8, ptr %"65", i64 4
%"52" = load float, ptr %"37", align 4
@ -46,7 +46,7 @@ define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"39", ptr addrsp
br label %"19"
"19": ; preds = %"18", %"17"
%"62" = load i64, ptr addrspace(5) %"42", align 4
%"62" = load i64, ptr addrspace(5) %"42", align 8
%"63" = load float, ptr addrspace(5) %"45", align 4
%"66" = inttoptr i64 %"62" to ptr
store float %"63", ptr %"66", align 4

View File

@ -11,15 +11,15 @@ define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"39", ptr addrs
br label %"38"
"38": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"47", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"48", ptr addrspace(5) %"42", align 4
%"50" = load i64, ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(4) %"39", align 8
store i64 %"47", ptr addrspace(5) %"41", align 8
%"48" = load i64, ptr addrspace(4) %"40", align 8
store i64 %"48", ptr addrspace(5) %"42", align 8
%"50" = load i64, ptr addrspace(5) %"41", align 8
%"64" = inttoptr i64 %"50" to ptr
%"49" = load float, ptr %"64", align 4
store float %"49", ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr addrspace(5) %"41", align 4
%"51" = load i64, ptr addrspace(5) %"41", align 8
%"65" = inttoptr i64 %"51" to ptr
%"37" = getelementptr inbounds i8, ptr %"65", i64 4
%"52" = load float, ptr %"37", align 4
@ -46,7 +46,7 @@ define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"39", ptr addrs
br label %"19"
"19": ; preds = %"18", %"17"
%"62" = load i64, ptr addrspace(5) %"42", align 4
%"62" = load i64, ptr addrspace(5) %"42", align 8
%"63" = load float, ptr addrspace(5) %"45", align 4
%"66" = inttoptr i64 %"62" to ptr
store float %"63", ptr %"66", align 4

View File

@ -17,45 +17,45 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"82"
"82": ; preds = %1
%"97" = load i64, ptr addrspace(4) %"83", align 4
store i64 %"97", ptr addrspace(5) %"85", align 4
%"98" = load i64, ptr addrspace(4) %"84", align 4
store i64 %"98", ptr addrspace(5) %"86", align 4
%"100" = load i64, ptr addrspace(5) %"85", align 4
%"97" = load i64, ptr addrspace(4) %"83", align 8
store i64 %"97", ptr addrspace(5) %"85", align 8
%"98" = load i64, ptr addrspace(4) %"84", align 8
store i64 %"98", ptr addrspace(5) %"86", align 8
%"100" = load i64, ptr addrspace(5) %"85", align 8
%"151" = inttoptr i64 %"100" to ptr
%"99" = load float, ptr %"151", align 4
store float %"99", ptr addrspace(5) %"87", align 4
%"101" = load i64, ptr addrspace(5) %"85", align 4
%"101" = load i64, ptr addrspace(5) %"85", align 8
%"152" = inttoptr i64 %"101" to ptr
%"55" = getelementptr inbounds i8, ptr %"152", i64 4
%"102" = load float, ptr %"55", align 4
store float %"102", ptr addrspace(5) %"88", align 4
%"103" = load i64, ptr addrspace(5) %"85", align 4
%"103" = load i64, ptr addrspace(5) %"85", align 8
%"153" = inttoptr i64 %"103" to ptr
%"57" = getelementptr inbounds i8, ptr %"153", i64 8
%"104" = load float, ptr %"57", align 4
store float %"104", ptr addrspace(5) %"89", align 4
%"105" = load i64, ptr addrspace(5) %"85", align 4
%"105" = load i64, ptr addrspace(5) %"85", align 8
%"154" = inttoptr i64 %"105" to ptr
%"59" = getelementptr inbounds i8, ptr %"154", i64 12
%"106" = load float, ptr %"59", align 4
store float %"106", ptr addrspace(5) %"90", align 4
%"107" = load i64, ptr addrspace(5) %"85", align 4
%"107" = load i64, ptr addrspace(5) %"85", align 8
%"155" = inttoptr i64 %"107" to ptr
%"61" = getelementptr inbounds i8, ptr %"155", i64 16
%"108" = load float, ptr %"61", align 4
store float %"108", ptr addrspace(5) %"91", align 4
%"109" = load i64, ptr addrspace(5) %"85", align 4
%"109" = load i64, ptr addrspace(5) %"85", align 8
%"156" = inttoptr i64 %"109" to ptr
%"63" = getelementptr inbounds i8, ptr %"156", i64 20
%"110" = load float, ptr %"63", align 4
store float %"110", ptr addrspace(5) %"92", align 4
%"111" = load i64, ptr addrspace(5) %"85", align 4
%"111" = load i64, ptr addrspace(5) %"85", align 8
%"157" = inttoptr i64 %"111" to ptr
%"65" = getelementptr inbounds i8, ptr %"157", i64 24
%"112" = load float, ptr %"65", align 4
store float %"112", ptr addrspace(5) %"93", align 4
%"113" = load i64, ptr addrspace(5) %"85", align 4
%"113" = load i64, ptr addrspace(5) %"85", align 8
%"158" = inttoptr i64 %"113" to ptr
%"67" = getelementptr inbounds i8, ptr %"158", i64 28
%"114" = load float, ptr %"67", align 4
@ -80,7 +80,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"25"
"25": ; preds = %"24", %"23"
%"122" = load i64, ptr addrspace(5) %"86", align 4
%"122" = load i64, ptr addrspace(5) %"86", align 8
%"123" = load i32, ptr addrspace(5) %"95", align 4
%"159" = inttoptr i64 %"122" to ptr
store i32 %"123", ptr %"159", align 4
@ -104,7 +104,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"29"
"29": ; preds = %"28", %"27"
%"131" = load i64, ptr addrspace(5) %"86", align 4
%"131" = load i64, ptr addrspace(5) %"86", align 8
%"160" = inttoptr i64 %"131" to ptr
%"73" = getelementptr inbounds i8, ptr %"160", i64 4
%"132" = load i32, ptr addrspace(5) %"95", align 4
@ -129,7 +129,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"33"
"33": ; preds = %"32", %"31"
%"140" = load i64, ptr addrspace(5) %"86", align 4
%"140" = load i64, ptr addrspace(5) %"86", align 8
%"161" = inttoptr i64 %"140" to ptr
%"77" = getelementptr inbounds i8, ptr %"161", i64 8
%"141" = load i32, ptr addrspace(5) %"95", align 4
@ -154,7 +154,7 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"37"
"37": ; preds = %"36", %"35"
%"149" = load i64, ptr addrspace(5) %"86", align 4
%"149" = load i64, ptr addrspace(5) %"86", align 8
%"162" = inttoptr i64 %"149" to ptr
%"81" = getelementptr inbounds i8, ptr %"162", i64 12
%"150" = load i32, ptr addrspace(5) %"95", align 4

View File

@ -17,45 +17,45 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"82"
"82": ; preds = %1
%"97" = load i64, ptr addrspace(4) %"83", align 4
store i64 %"97", ptr addrspace(5) %"85", align 4
%"98" = load i64, ptr addrspace(4) %"84", align 4
store i64 %"98", ptr addrspace(5) %"86", align 4
%"100" = load i64, ptr addrspace(5) %"85", align 4
%"97" = load i64, ptr addrspace(4) %"83", align 8
store i64 %"97", ptr addrspace(5) %"85", align 8
%"98" = load i64, ptr addrspace(4) %"84", align 8
store i64 %"98", ptr addrspace(5) %"86", align 8
%"100" = load i64, ptr addrspace(5) %"85", align 8
%"151" = inttoptr i64 %"100" to ptr
%"99" = load float, ptr %"151", align 4
store float %"99", ptr addrspace(5) %"87", align 4
%"101" = load i64, ptr addrspace(5) %"85", align 4
%"101" = load i64, ptr addrspace(5) %"85", align 8
%"152" = inttoptr i64 %"101" to ptr
%"55" = getelementptr inbounds i8, ptr %"152", i64 4
%"102" = load float, ptr %"55", align 4
store float %"102", ptr addrspace(5) %"88", align 4
%"103" = load i64, ptr addrspace(5) %"85", align 4
%"103" = load i64, ptr addrspace(5) %"85", align 8
%"153" = inttoptr i64 %"103" to ptr
%"57" = getelementptr inbounds i8, ptr %"153", i64 8
%"104" = load float, ptr %"57", align 4
store float %"104", ptr addrspace(5) %"89", align 4
%"105" = load i64, ptr addrspace(5) %"85", align 4
%"105" = load i64, ptr addrspace(5) %"85", align 8
%"154" = inttoptr i64 %"105" to ptr
%"59" = getelementptr inbounds i8, ptr %"154", i64 12
%"106" = load float, ptr %"59", align 4
store float %"106", ptr addrspace(5) %"90", align 4
%"107" = load i64, ptr addrspace(5) %"85", align 4
%"107" = load i64, ptr addrspace(5) %"85", align 8
%"155" = inttoptr i64 %"107" to ptr
%"61" = getelementptr inbounds i8, ptr %"155", i64 16
%"108" = load float, ptr %"61", align 4
store float %"108", ptr addrspace(5) %"91", align 4
%"109" = load i64, ptr addrspace(5) %"85", align 4
%"109" = load i64, ptr addrspace(5) %"85", align 8
%"156" = inttoptr i64 %"109" to ptr
%"63" = getelementptr inbounds i8, ptr %"156", i64 20
%"110" = load float, ptr %"63", align 4
store float %"110", ptr addrspace(5) %"92", align 4
%"111" = load i64, ptr addrspace(5) %"85", align 4
%"111" = load i64, ptr addrspace(5) %"85", align 8
%"157" = inttoptr i64 %"111" to ptr
%"65" = getelementptr inbounds i8, ptr %"157", i64 24
%"112" = load float, ptr %"65", align 4
store float %"112", ptr addrspace(5) %"93", align 4
%"113" = load i64, ptr addrspace(5) %"85", align 4
%"113" = load i64, ptr addrspace(5) %"85", align 8
%"158" = inttoptr i64 %"113" to ptr
%"67" = getelementptr inbounds i8, ptr %"158", i64 28
%"114" = load float, ptr %"67", align 4
@ -80,7 +80,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"25"
"25": ; preds = %"24", %"23"
%"122" = load i64, ptr addrspace(5) %"86", align 4
%"122" = load i64, ptr addrspace(5) %"86", align 8
%"123" = load i32, ptr addrspace(5) %"95", align 4
%"159" = inttoptr i64 %"122" to ptr
store i32 %"123", ptr %"159", align 4
@ -104,7 +104,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"29"
"29": ; preds = %"28", %"27"
%"131" = load i64, ptr addrspace(5) %"86", align 4
%"131" = load i64, ptr addrspace(5) %"86", align 8
%"160" = inttoptr i64 %"131" to ptr
%"73" = getelementptr inbounds i8, ptr %"160", i64 4
%"132" = load i32, ptr addrspace(5) %"95", align 4
@ -129,7 +129,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"33"
"33": ; preds = %"32", %"31"
%"140" = load i64, ptr addrspace(5) %"86", align 4
%"140" = load i64, ptr addrspace(5) %"86", align 8
%"161" = inttoptr i64 %"140" to ptr
%"77" = getelementptr inbounds i8, ptr %"161", i64 8
%"141" = load i32, ptr addrspace(5) %"95", align 4
@ -154,7 +154,7 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrs
br label %"37"
"37": ; preds = %"36", %"35"
%"149" = load i64, ptr addrspace(5) %"86", align 4
%"149" = load i64, ptr addrspace(5) %"86", align 8
%"162" = inttoptr i64 %"149" to ptr
%"81" = getelementptr inbounds i8, ptr %"162", i64 12
%"150" = load i32, ptr addrspace(5) %"95", align 4

View File

@ -12,28 +12,28 @@ define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"35", ptr
br label %"34"
"34": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"42", ptr addrspace(5) %"37", align 4
%"43" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"43", ptr addrspace(5) %"38", align 4
%"42" = load i64, ptr addrspace(4) %"35", align 8
store i64 %"42", ptr addrspace(5) %"37", align 8
%"43" = load i64, ptr addrspace(4) %"36", align 8
store i64 %"43", ptr addrspace(5) %"38", align 8
store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"37", align 4
%"46" = load i64, ptr addrspace(5) %"37", align 8
%"54" = inttoptr i64 %"46" to ptr addrspace(1)
%"45" = load i64, ptr addrspace(1) %"54", align 4
store i64 %"45", ptr addrspace(5) %"40", align 4
%"45" = load i64, ptr addrspace(1) %"54", align 8
store i64 %"45", ptr addrspace(5) %"40", align 8
%"47" = load i32, ptr addrspace(5) %"39", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 8
%"55" = inttoptr i32 %"47" to ptr addrspace(3)
store i64 %"48", ptr addrspace(3) %"55", align 4
store i64 %"48", ptr addrspace(3) %"55", align 8
%"49" = load i32, ptr addrspace(5) %"39", align 4
%"56" = inttoptr i32 %"49" to ptr addrspace(3)
%"33" = getelementptr inbounds i8, ptr addrspace(3) %"56", i64 0
%"50" = load i64, ptr addrspace(3) %"33", align 4
store i64 %"50", ptr addrspace(5) %"41", align 4
%"51" = load i64, ptr addrspace(5) %"38", align 4
%"52" = load i64, ptr addrspace(5) %"41", align 4
%"50" = load i64, ptr addrspace(3) %"33", align 8
store i64 %"50", ptr addrspace(5) %"41", align 8
%"51" = load i64, ptr addrspace(5) %"38", align 8
%"52" = load i64, ptr addrspace(5) %"41", align 8
%"57" = inttoptr i64 %"51" to ptr addrspace(1)
store i64 %"52", ptr addrspace(1) %"57", align 4
store i64 %"52", ptr addrspace(1) %"57", align 8
ret void
}

View File

@ -12,27 +12,27 @@ define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %
br label %"32"
"32": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"33", align 4
store i64 %"40", ptr addrspace(5) %"35", align 4
%"41" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"41", ptr addrspace(5) %"36", align 4
store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"35", align 4
%"40" = load i64, ptr addrspace(4) %"33", align 8
store i64 %"40", ptr addrspace(5) %"35", align 8
%"41" = load i64, ptr addrspace(4) %"34", align 8
store i64 %"41", ptr addrspace(5) %"36", align 8
store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"37", align 8
%"44" = load i64, ptr addrspace(5) %"35", align 8
%"52" = inttoptr i64 %"44" to ptr addrspace(1)
%"43" = load i64, ptr addrspace(1) %"52", align 4
store i64 %"43", ptr addrspace(5) %"38", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(1) %"52", align 8
store i64 %"43", ptr addrspace(5) %"38", align 8
%"45" = load i64, ptr addrspace(5) %"37", align 8
%"46" = load i64, ptr addrspace(5) %"38", align 8
%"53" = inttoptr i64 %"45" to ptr addrspace(3)
store i64 %"46", ptr addrspace(3) %"53", align 4
%"48" = load i64, ptr addrspace(5) %"37", align 4
store i64 %"46", ptr addrspace(3) %"53", align 8
%"48" = load i64, ptr addrspace(5) %"37", align 8
%"54" = inttoptr i64 %"48" to ptr addrspace(3)
%"47" = load i64, ptr addrspace(3) %"54", align 4
store i64 %"47", ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"36", align 4
%"50" = load i64, ptr addrspace(5) %"39", align 4
%"47" = load i64, ptr addrspace(3) %"54", align 8
store i64 %"47", ptr addrspace(5) %"39", align 8
%"49" = load i64, ptr addrspace(5) %"36", align 8
%"50" = load i64, ptr addrspace(5) %"39", align 8
%"55" = inttoptr i64 %"49" to ptr addrspace(1)
store i64 %"50", ptr addrspace(1) %"55", align 4
store i64 %"50", ptr addrspace(1) %"55", align 8
ret void
}

Some files were not shown because too many files have changed in this diff Show More