mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-05-02 06:18:55 +03:00
Bunch of tiny fixes and improvements
This commit is contained in:
@ -1020,12 +1020,8 @@ fn emit_function_header<'a>(
|
|||||||
kernel_info: &mut HashMap<String, KernelInfo>,
|
kernel_info: &mut HashMap<String, KernelInfo>,
|
||||||
) -> Result<spirv::Word, TranslateError> {
|
) -> Result<spirv::Word, TranslateError> {
|
||||||
if let ast::MethodName::Kernel(name) = func_decl.name {
|
if let ast::MethodName::Kernel(name) = func_decl.name {
|
||||||
let input_args = if func_decl.shared_mem.is_none() {
|
let args_lens = func_decl
|
||||||
func_decl.input_arguments.as_slice()
|
.input_arguments
|
||||||
} else {
|
|
||||||
&func_decl.input_arguments[0..func_decl.input_arguments.len() - 1]
|
|
||||||
};
|
|
||||||
let args_lens = input_args
|
|
||||||
.iter()
|
.iter()
|
||||||
.map(|param| param.v_type.size_of())
|
.map(|param| param.v_type.size_of())
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -3449,7 +3449,7 @@ pub extern "system" fn cuStreamQuery(hStream: CUstream) -> CUresult {
|
|||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
pub extern "system" fn cuStreamSynchronize(hStream: CUstream) -> CUresult {
|
pub extern "system" fn cuStreamSynchronize(hStream: CUstream) -> CUresult {
|
||||||
r#impl::unimplemented()
|
CUresult::CUDA_SUCCESS
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(not(test), no_mangle)]
|
#[cfg_attr(not(test), no_mangle)]
|
||||||
|
@ -113,7 +113,7 @@ pub fn launch_kernel(
|
|||||||
func.arg_size.iter().fold(0, |offset, size_of_arg| {
|
func.arg_size.iter().fold(0, |offset, size_of_arg| {
|
||||||
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
|
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
|
||||||
});
|
});
|
||||||
if buffer_size != sum_of_kernel_argument_sizes {
|
if buffer_size < sum_of_kernel_argument_sizes {
|
||||||
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||||
}
|
}
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
|
@ -4399,3 +4399,20 @@ extern_redirect_with! {
|
|||||||
extern_redirect! {
|
extern_redirect! {
|
||||||
pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult;
|
pub fn cuFuncGetModule(hmod: *mut CUmodule, hfunc: CUfunction) -> CUresult;
|
||||||
}
|
}
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[derive(Copy, Clone, Hash, PartialEq, Eq)]
|
||||||
|
pub struct CUoutput_mode_enum(pub ::std::os::raw::c_uint);
|
||||||
|
pub use self::CUoutput_mode_enum as CUoutput_mode;
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuProfilerInitialize(
|
||||||
|
configFile: *const ::std::os::raw::c_char,
|
||||||
|
outputFile: *const ::std::os::raw::c_char,
|
||||||
|
outputMode: CUoutput_mode,
|
||||||
|
) -> CUresult;
|
||||||
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuProfilerStart() -> CUresult;
|
||||||
|
}
|
||||||
|
extern_redirect! {
|
||||||
|
pub fn cuProfilerStop() -> CUresult;
|
||||||
|
}
|
||||||
|
@ -576,7 +576,7 @@ fn dump_arguments(
|
|||||||
let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
|
let sum_of_kernel_argument_sizes = args.iter().fold(0, |offset, size_of_arg| {
|
||||||
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
|
size_of_arg + round_up_to_multiple(offset, *size_of_arg)
|
||||||
});
|
});
|
||||||
if buffer_size != sum_of_kernel_argument_sizes {
|
if buffer_size < sum_of_kernel_argument_sizes {
|
||||||
return Err("Malformed `extra` parameter to kernel launch")?;
|
return Err("Malformed `extra` parameter to kernel launch")?;
|
||||||
}
|
}
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
|
@ -51,9 +51,10 @@ def parse_arguments(dump_path, prefix):
|
|||||||
return [load_arguments(path.join(dir, f)) for f in sorted(arg_files)]
|
return [load_arguments(path.join(dir, f)) for f in sorted(arg_files)]
|
||||||
|
|
||||||
|
|
||||||
def append_debug_buffer(args):
|
def append_debug_buffer(args, grid, block):
|
||||||
args = list(args)
|
args = list(args)
|
||||||
debug_buff = np.zeros(1024 * 1024, np.single)
|
items = block[0] * block[1] * block[2] * block[0] * block[1] * block[2]
|
||||||
|
debug_buff = np.zeros(items, dtype=np.uint32)
|
||||||
args.append((drv.InOut(debug_buff), debug_buff))
|
args.append((drv.InOut(debug_buff), debug_buff))
|
||||||
return args
|
return args
|
||||||
|
|
||||||
@ -71,7 +72,7 @@ def verify_single_dump(input_path, max_block_threads):
|
|||||||
return
|
return
|
||||||
module = drv.module_from_file(path.join(input_path, "module.ptx"))
|
module = drv.module_from_file(path.join(input_path, "module.ptx"))
|
||||||
kernel = module.get_function(kernel_name)
|
kernel = module.get_function(kernel_name)
|
||||||
pre_args = append_debug_buffer(parse_arguments(input_path, "pre"))
|
pre_args = append_debug_buffer(parse_arguments(input_path, "pre"), tuple(launch_lines[:3]), block)
|
||||||
kernel_pre_args, host_pre_args = zip(*pre_args)
|
kernel_pre_args, host_pre_args = zip(*pre_args)
|
||||||
kernel(*list(kernel_pre_args), grid=tuple(launch_lines[:3]), block=block, shared=launch_lines[6])
|
kernel(*list(kernel_pre_args), grid=tuple(launch_lines[:3]), block=block, shared=launch_lines[6])
|
||||||
post_args = parse_arguments(input_path, "post")
|
post_args = parse_arguments(input_path, "post")
|
||||||
@ -94,6 +95,7 @@ def main(argv):
|
|||||||
verify_single_dump(input_path, max_threads)
|
verify_single_dump(input_path, max_threads)
|
||||||
else:
|
else:
|
||||||
for input_subdir in sorted([path.join(input_path, dir_name) for dir_name in os.listdir(input_path)]):
|
for input_subdir in sorted([path.join(input_path, dir_name) for dir_name in os.listdir(input_path)]):
|
||||||
|
if os.path.isdir(input_subdir):
|
||||||
verify_single_dump(input_subdir, max_threads)
|
verify_single_dump(input_subdir, max_threads)
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user