diff --git a/notcuda/src/cu.rs b/notcuda/src/cu.rs index 311552c..2cf152e 100644 --- a/notcuda/src/cu.rs +++ b/notcuda/src/cu.rs @@ -81,6 +81,117 @@ pub enum Result { ERROR_UNKNOWN = 999, } +#[repr(C)] +#[allow(non_camel_case_types)] +pub enum DeviceAttribute { + MAX_THREADS_PER_BLOCK = 1, + MAX_BLOCK_DIM_X = 2, + MAX_BLOCK_DIM_Y = 3, + MAX_BLOCK_DIM_Z = 4, + MAX_GRID_DIM_X = 5, + MAX_GRID_DIM_Y = 6, + MAX_GRID_DIM_Z = 7, + MAX_SHARED_MEMORY_PER_BLOCK = 8, + TOTAL_CONSTANT_MEMORY = 9, + WARP_SIZE = 10, + MAX_PITCH = 11, + MAX_REGISTERS_PER_BLOCK = 12, + CLOCK_RATE = 13, + TEXTURE_ALIGNMENT = 14, + GPU_OVERLAP = 15, + MULTIPROCESSOR_COUNT = 16, + KERNEL_EXEC_TIMEOUT = 17, + INTEGRATED = 18, + CAN_MAP_HOST_MEMORY = 19, + COMPUTE_MODE = 20, + MAXIMUM_TEXTURE1D_WIDTH = 21, + MAXIMUM_TEXTURE2D_WIDTH = 22, + MAXIMUM_TEXTURE2D_HEIGHT = 23, + MAXIMUM_TEXTURE3D_WIDTH = 24, + MAXIMUM_TEXTURE3D_HEIGHT = 25, + MAXIMUM_TEXTURE3D_DEPTH = 26, + MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27, + MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28, + MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29, + SURFACE_ALIGNMENT = 30, + CONCURRENT_KERNELS = 31, + ECC_ENABLED = 32, + PCI_BUS_ID = 33, + PCI_DEVICE_ID = 34, + TCC_DRIVER = 35, + MEMORY_CLOCK_RATE = 36, + GLOBAL_MEMORY_BUS_WIDTH = 37, + L2_CACHE_SIZE = 38, + MAX_THREADS_PER_MULTIPROCESSOR = 39, + ASYNC_ENGINE_COUNT = 40, + UNIFIED_ADDRESSING = 41, + MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, + MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43, + CAN_TEX2D_GATHER = 44, + MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45, + MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46, + MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47, + MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48, + MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49, + PCI_DOMAIN_ID = 50, + TEXTURE_PITCH_ALIGNMENT = 51, + MAXIMUM_TEXTURECUBEMAP_WIDTH = 52, + MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53, + MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54, + MAXIMUM_SURFACE1D_WIDTH = 55, + MAXIMUM_SURFACE2D_WIDTH = 56, + MAXIMUM_SURFACE2D_HEIGHT = 57, + MAXIMUM_SURFACE3D_WIDTH = 58, + MAXIMUM_SURFACE3D_HEIGHT = 59, + MAXIMUM_SURFACE3D_DEPTH = 60, + MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61, + MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62, + MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63, + MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64, + MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65, + MAXIMUM_SURFACECUBEMAP_WIDTH = 66, + MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67, + MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68, + MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69, + MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70, + MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71, + MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72, + MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73, + MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74, + COMPUTE_CAPABILITY_MAJOR = 75, + COMPUTE_CAPABILITY_MINOR = 76, + MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77, + STREAM_PRIORITIES_SUPPORTED = 78, + GLOBAL_L1_CACHE_SUPPORTED = 79, + LOCAL_L1_CACHE_SUPPORTED = 80, + MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81, + MAX_REGISTERS_PER_MULTIPROCESSOR = 82, + MANAGED_MEMORY = 83, + MULTI_GPU_BOARD = 84, + MULTI_GPU_BOARD_GROUP_ID = 85, + HOST_NATIVE_ATOMIC_SUPPORTED = 86, + SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87, + PAGEABLE_MEMORY_ACCESS = 88, + CONCURRENT_MANAGED_ACCESS = 89, + COMPUTE_PREEMPTION_SUPPORTED = 90, + CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91, + CAN_USE_STREAM_MEM_OPS = 92, + CAN_USE_64_BIT_STREAM_MEM_OPS = 93, + CAN_USE_STREAM_WAIT_VALUE_NOR = 94, + COOPERATIVE_LAUNCH = 95, + COOPERATIVE_MULTI_DEVICE_LAUNCH = 96, + MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97, + CAN_FLUSH_REMOTE_WRITES = 98, + HOST_REGISTER_SUPPORTED = 99, + PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100, + DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101, + VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102, + HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103, + HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104, + HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105, + MAX = 106, +} + impl Result { pub fn from_l0(result: l0::ze_result_t) -> Result { match result { diff --git a/notcuda/src/lib.rs b/notcuda/src/lib.rs index bb17f8b..6d9b884 100644 --- a/notcuda/src/lib.rs +++ b/notcuda/src/lib.rs @@ -4,15 +4,12 @@ extern crate lazy_static; use std::sync::Mutex; use std::ptr; -use std::cmp; use std::os::raw::{c_char, c_int, c_uint}; mod cu; mod export_table; mod ze; -use ze::Versioned; - macro_rules! l0_check_err { ($exp:expr) => { { @@ -24,24 +21,13 @@ macro_rules! l0_check_err { }; } -macro_rules! l0_check { - ($exp:expr) => { - { - let result = unsafe{ $exp }; - if result != l0::ze_result_t::ZE_RESULT_SUCCESS { - return result; - } - } - }; -} - lazy_static! { pub static ref GLOBAL_STATE: Mutex> = Mutex::new(None); } pub struct Driver { base: l0::ze_driver_handle_t, - devices: Vec:: + devices: Vec:: } unsafe impl Send for Driver {} unsafe impl Sync for Driver {} @@ -58,7 +44,7 @@ impl Driver { if (count as usize) < devices.len() { devices.truncate(count as usize); } - Ok(Driver{ base: handle, devices: devices }) + Ok(Driver{ base: handle, devices: ze::Device::new_vec(devices) }) } fn call l0::ze_result_t>(f: F) -> cu::Result { @@ -75,6 +61,19 @@ impl Driver { } } + fn call_device l0::ze_result_t>(cu::Device(dev): cu::Device, f: F) -> cu::Result { + if dev < 0 { + return cu::Result::ERROR_INVALID_VALUE; + } + let dev = dev as usize; + Driver::call(|driver| { + if dev >= driver.devices.len() { + return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + f(&mut driver.devices[dev]) + }) + } + fn device_get_count(&self, count: *mut i32) -> l0::ze_result_t { unsafe { *count = self.devices.len() as i32 }; l0::ze_result_t::ZE_RESULT_SUCCESS @@ -87,40 +86,6 @@ impl Driver { unsafe { *device = cu::Device(ordinal) }; l0::ze_result_t::ZE_RESULT_SUCCESS } - - fn device_get_name(&self, name: *mut c_char, len: c_int, cu::Device(dev): cu::Device) -> l0::ze_result_t { - if (dev as usize) >= self.devices.len() { - return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT; - } - let mut props = Box::new(l0::ze_device_properties_t::new()); - l0_check! { l0::zeDeviceGetProperties(self.devices[dev as usize], props.as_mut()) }; - let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0); - let dst_null_pos = cmp::min((len - 1) as usize, null_pos); - unsafe { *(name.add(dst_null_pos)) = 0 }; - unsafe { std::ptr::copy_nonoverlapping(props.name.as_ptr(), name, dst_null_pos) }; - l0::ze_result_t::ZE_RESULT_SUCCESS - } - - fn device_total_mem(&self, bytes: *mut usize, cu::Device(dev): cu::Device) -> l0::ze_result_t { - if (dev as usize) >= self.devices.len() { - return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT; - } - let dev = dev as usize; - let mut count = 0; - l0_check! { l0::zeDeviceGetMemoryProperties(self.devices[dev], &mut count, ptr::null_mut()) }; - if count == 0 { - return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN; - } - let mut props = vec![l0::ze_device_memory_properties_t::new(); count as usize]; - l0_check! { l0::zeDeviceGetMemoryProperties(self.devices[dev], &mut count, props.as_mut_ptr()) }; - let iter_count = cmp::min(count as usize, props.len()); - if iter_count == 0 { - return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN; - } - let max_mem = props.iter().take(iter_count).map(|p| p.totalSize).max().unwrap(); - unsafe { *bytes = max_mem as usize }; - l0::ze_result_t::ZE_RESULT_SUCCESS - } } #[no_mangle] @@ -169,19 +134,28 @@ pub extern "C" fn cuDeviceGet(device: *mut cu::Device, ordinal: c_int) -> cu::Re } #[no_mangle] -pub extern "C" fn cuDeviceGetName(name: *mut c_char, len: c_int, dev: cu::Device) -> cu::Result { - let cu::Device(dev_idx) = dev; - if len <= 0 || dev_idx < 0 || name == ptr::null_mut() { +pub extern "C" fn cuDeviceGetName(name: *mut c_char, len: c_int, dev_idx: cu::Device) -> cu::Result { + if name == ptr::null_mut() || len <= 0 { return cu::Result::ERROR_INVALID_VALUE; } - Driver::call(|driver| driver.device_get_name(name, len, dev)) + Driver::call_device(dev_idx, |dev| dev.get_name(name, len)) } #[no_mangle] -pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: cu::Device) -> cu::Result { - let cu::Device(dev_idx) = dev; - if dev_idx < 0 || bytes == ptr::null_mut() { +pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev_idx: cu::Device) -> cu::Result { + if bytes == ptr::null_mut() { return cu::Result::ERROR_INVALID_VALUE; } - Driver::call(|driver| driver.device_total_mem(bytes, dev)) -} \ No newline at end of file + Driver::call_device(dev_idx, |dev| dev.total_mem(bytes)) +} + +/* +#[no_mangle] +pub extern "C" fn cuDeviceGetAttribute(pi: *mut c_int, attrib: cu::DeviceAttribute, dev: cu::Device) -> cu::Result { + let cu::Device(dev_idx) = dev; + if pi == ptr::null_mut() || dev_idx < 0 { + return cu::Result::ERROR_INVALID_VALUE; + } + Driver::call(|driver| driver.device_get_attribute(bytes, dev)) +} +*/ \ No newline at end of file diff --git a/notcuda/src/ze.rs b/notcuda/src/ze.rs index 6d798b1..1da9a3e 100644 --- a/notcuda/src/ze.rs +++ b/notcuda/src/ze.rs @@ -1,5 +1,29 @@ use level_zero_sys::*; +use std::cmp; +use std::mem; +use std::os::raw::{c_char, c_int}; +use std::ptr; + +macro_rules! assert_size_eq { + ($x:ty, $($xs:ty),+ $(,)?) => { + const _: fn() = || { + $(let _ = ::std::mem::transmute::<$x, $xs>;)+ + }; + }; +} + +macro_rules! l0_check { + ($exp:expr) => { + { + let result = unsafe{ $exp }; + if result != l0::ze_result_t::ZE_RESULT_SUCCESS { + return result; + } + } + }; +} + pub trait Versioned : Sized { type Version; @@ -35,4 +59,40 @@ impl Versioned for ze_device_properties_t { } } +#[derive(Clone, Copy)] +#[repr(transparent)] // required so a Vec can be safely transmutted to Vec +pub struct Device(pub ze_device_handle_t); +impl Device { + pub fn new_vec(v: Vec) -> Vec { + assert_size_eq!(Device, ze_device_handle_t); + unsafe { mem::transmute(v) } + } + + pub fn get_name(self, name: *mut c_char, len: c_int) -> l0::ze_result_t { + let mut props = Box::new(l0::ze_device_properties_t::new()); + l0_check! { l0::zeDeviceGetProperties(self.0, props.as_mut()) }; + let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0); + let dst_null_pos = cmp::min((len - 1) as usize, null_pos); + unsafe { *(name.add(dst_null_pos)) = 0 }; + unsafe { std::ptr::copy_nonoverlapping(props.name.as_ptr(), name, dst_null_pos) }; + l0::ze_result_t::ZE_RESULT_SUCCESS + } + + pub fn total_mem(self, bytes: *mut usize) -> l0::ze_result_t { + let mut count = 0; + l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, ptr::null_mut()) }; + if count == 0 { + return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN; + } + let mut props = vec![l0::ze_device_memory_properties_t::new(); count as usize]; + l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, props.as_mut_ptr()) }; + let iter_count = cmp::min(count as usize, props.len()); + if iter_count == 0 { + return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN; + } + let max_mem = props.iter().take(iter_count).map(|p| p.totalSize).max().unwrap(); + unsafe { *bytes = max_mem as usize }; + l0::ze_result_t::ZE_RESULT_SUCCESS + } +} \ No newline at end of file