Make everything async

This commit is contained in:
Andrzej Janik
2021-07-04 15:40:08 +02:00
parent d76ffd691c
commit ecc33f7b10
6 changed files with 39 additions and 32 deletions

Binary file not shown.

Binary file not shown.

View File

@ -781,6 +781,26 @@ impl<'a> CommandList<'a> {
Ok(unsafe { Self::from_ffi(result) }) Ok(unsafe { Self::from_ffi(result) })
} }
pub fn new_immediate(ctx: &'a Context, dev: Device) -> Result<Self> {
let queue_desc = sys::ze_command_queue_desc_t {
stype: sys::ze_structure_type_t::ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
pNext: ptr::null(),
ordinal: 0,
index: 0,
flags: sys::ze_command_queue_flags_t(0),
mode: sys::ze_command_queue_mode_t::ZE_COMMAND_QUEUE_MODE_DEFAULT,
priority: sys::ze_command_queue_priority_t::ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
};
let mut result: sys::ze_command_list_handle_t = ptr::null_mut();
check!(sys::zeCommandListCreateImmediate(
ctx.as_ffi(),
dev.as_ffi(),
&queue_desc,
&mut result
));
Ok(unsafe { Self::from_ffi(result) })
}
pub unsafe fn append_memory_copy< pub unsafe fn append_memory_copy<
'dep, 'dep,
T: 'a + 'dep + Copy + Sized, T: 'a + 'dep + Copy + Sized,

View File

@ -81,7 +81,7 @@ pub fn launch_kernel(
{ {
return Err(CUresult::CUDA_ERROR_INVALID_VALUE); return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
} }
GlobalState::lock_stream(hstream, |stream| { GlobalState::lock_enqueue(hstream, |cmd_list, signal, wait| {
let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?; let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?;
if kernel_params != ptr::null_mut() { if kernel_params != ptr::null_mut() {
for (i, arg_size) in func.arg_size.iter().enumerate() { for (i, arg_size) in func.arg_size.iter().enumerate() {
@ -144,19 +144,16 @@ pub fn launch_kernel(
func.base func.base
.set_group_size(block_dim_x, block_dim_y, block_dim_z)?; .set_group_size(block_dim_x, block_dim_y, block_dim_z)?;
func.legacy_args.reset(); func.legacy_args.reset();
let cmd_list = stream.command_list()?;
unsafe { unsafe {
cmd_list.append_launch_kernel( cmd_list.append_launch_kernel(
&mut func.base, &mut func.base,
&[grid_dim_x, grid_dim_y, grid_dim_z], &[grid_dim_x, grid_dim_y, grid_dim_z],
None, Some(signal),
&mut [], wait,
)?; )?;
} }
cmd_list.close()?; Ok::<_, CUresult>(())
stream.queue.execute_and_synchronize(cmd_list)?; })
Ok(())
})?
} }
fn round_up_to_multiple(x: usize, multiple: usize) -> usize { fn round_up_to_multiple(x: usize, multiple: usize) -> usize {

View File

@ -13,7 +13,7 @@ pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult>
pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> { pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<(), CUresult> {
GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| { GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
unsafe { cmd_list.append_memory_copy_raw(dst, src, bytesize, Some(signal), wait)? }; unsafe { cmd_list.append_memory_copy_raw(dst, src, bytesize, Some(signal), wait)? };
Ok::<_, l0::sys::ze_result_t>(()) Ok(())
}) })
} }
@ -26,41 +26,35 @@ pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
} }
pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(), CUresult> { pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(), CUresult> {
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| { GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
let cmd_list = stream.command_list()?;
unsafe { unsafe {
cmd_list.append_memory_fill_raw( cmd_list.append_memory_fill_raw(
dst, dst,
&mut ui as *mut _ as *mut _, &mut ui as *mut _ as *mut _,
mem::size_of::<u32>(), mem::size_of::<u32>(),
mem::size_of::<u32>() * n, mem::size_of::<u32>() * n,
None, Some(signal),
&mut [], wait,
) )
}?; }?;
cmd_list.close()?; Ok(())
stream.queue.execute_and_synchronize(cmd_list)?; })
Ok::<_, CUresult>(())
})?
} }
pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CUresult> { pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CUresult> {
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream| { GlobalState::lock_enqueue(stream::CU_STREAM_LEGACY, |cmd_list, signal, wait| {
let cmd_list = stream.command_list()?;
unsafe { unsafe {
cmd_list.append_memory_fill_raw( cmd_list.append_memory_fill_raw(
dst, dst,
&mut uc as *mut _ as *mut _, &mut uc as *mut _ as *mut _,
mem::size_of::<u8>(), mem::size_of::<u8>(),
mem::size_of::<u8>() * n, mem::size_of::<u8>() * n,
None, Some(signal),
&mut [], wait,
) )
}?; }?;
cmd_list.close()?; Ok(())
stream.queue.execute_and_synchronize(cmd_list)?; })
Ok::<_, CUresult>(())
})?
} }
#[cfg(test)] #[cfg(test)]

View File

@ -275,17 +275,13 @@ impl GlobalState {
fn lock_enqueue( fn lock_enqueue(
stream: *mut stream::Stream, stream: *mut stream::Stream,
f: impl FnOnce( f: impl FnOnce(&l0::CommandList, &l0::Event<'static>, &[&l0::Event<'static>]) -> Result<(), CUresult>,
&mut l0::CommandList,
&l0::Event<'static>,
&[&l0::Event<'static>],
) -> l0::Result<()>,
) -> Result<(), CUresult> { ) -> Result<(), CUresult> {
Self::lock_stream(stream, |stream_data| { Self::lock_stream(stream, |stream_data| {
let l0_dev = unsafe { (*(*stream_data.context).device).base }; let l0_dev = unsafe { (*(*stream_data.context).device).base };
let l0_ctx = unsafe { &mut (*(*stream_data.context).device).l0_context }; let l0_ctx = unsafe { &mut (*(*stream_data.context).device).l0_context };
let event_pool = unsafe { &mut (*(*stream_data.context).device).event_pool }; let event_pool = unsafe { &mut (*(*stream_data.context).device).event_pool };
let mut cmd_list = unsafe { mem::transmute(stream_data.command_list()?) }; let cmd_list = unsafe { mem::transmute(stream_data.command_list()?) };
stream_data stream_data
.process_finished_events(&mut |(_, marker)| event_pool.mark_as_free(marker))?; .process_finished_events(&mut |(_, marker)| event_pool.mark_as_free(marker))?;
let prev_event = stream_data.get_last_event(); let prev_event = stream_data.get_last_event();
@ -293,7 +289,7 @@ impl GlobalState {
let empty = []; let empty = [];
let prev_event_slice = prev_event_array.as_ref().map_or(&empty[..], |arr| &arr[..]); let prev_event_slice = prev_event_array.as_ref().map_or(&empty[..], |arr| &arr[..]);
let (new_event, new_marker) = event_pool.get(l0_dev, l0_ctx)?; let (new_event, new_marker) = event_pool.get(l0_dev, l0_ctx)?;
f(&mut cmd_list, &new_event, prev_event_slice)?; f(&cmd_list, &new_event, prev_event_slice)?;
cmd_list.close()?; cmd_list.close()?;
unsafe { stream_data.queue.execute(&cmd_list, None)? }; unsafe { stream_data.queue.execute(&cmd_list, None)? };
stream_data.push_event((new_event, new_marker)); stream_data.push_event((new_event, new_marker));