mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-07-18 09:46:21 +03:00
Refactor device functions
This commit is contained in:
@ -81,6 +81,117 @@ pub enum Result {
|
||||
ERROR_UNKNOWN = 999,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[allow(non_camel_case_types)]
|
||||
pub enum DeviceAttribute {
|
||||
MAX_THREADS_PER_BLOCK = 1,
|
||||
MAX_BLOCK_DIM_X = 2,
|
||||
MAX_BLOCK_DIM_Y = 3,
|
||||
MAX_BLOCK_DIM_Z = 4,
|
||||
MAX_GRID_DIM_X = 5,
|
||||
MAX_GRID_DIM_Y = 6,
|
||||
MAX_GRID_DIM_Z = 7,
|
||||
MAX_SHARED_MEMORY_PER_BLOCK = 8,
|
||||
TOTAL_CONSTANT_MEMORY = 9,
|
||||
WARP_SIZE = 10,
|
||||
MAX_PITCH = 11,
|
||||
MAX_REGISTERS_PER_BLOCK = 12,
|
||||
CLOCK_RATE = 13,
|
||||
TEXTURE_ALIGNMENT = 14,
|
||||
GPU_OVERLAP = 15,
|
||||
MULTIPROCESSOR_COUNT = 16,
|
||||
KERNEL_EXEC_TIMEOUT = 17,
|
||||
INTEGRATED = 18,
|
||||
CAN_MAP_HOST_MEMORY = 19,
|
||||
COMPUTE_MODE = 20,
|
||||
MAXIMUM_TEXTURE1D_WIDTH = 21,
|
||||
MAXIMUM_TEXTURE2D_WIDTH = 22,
|
||||
MAXIMUM_TEXTURE2D_HEIGHT = 23,
|
||||
MAXIMUM_TEXTURE3D_WIDTH = 24,
|
||||
MAXIMUM_TEXTURE3D_HEIGHT = 25,
|
||||
MAXIMUM_TEXTURE3D_DEPTH = 26,
|
||||
MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
|
||||
MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
|
||||
MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
|
||||
SURFACE_ALIGNMENT = 30,
|
||||
CONCURRENT_KERNELS = 31,
|
||||
ECC_ENABLED = 32,
|
||||
PCI_BUS_ID = 33,
|
||||
PCI_DEVICE_ID = 34,
|
||||
TCC_DRIVER = 35,
|
||||
MEMORY_CLOCK_RATE = 36,
|
||||
GLOBAL_MEMORY_BUS_WIDTH = 37,
|
||||
L2_CACHE_SIZE = 38,
|
||||
MAX_THREADS_PER_MULTIPROCESSOR = 39,
|
||||
ASYNC_ENGINE_COUNT = 40,
|
||||
UNIFIED_ADDRESSING = 41,
|
||||
MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
|
||||
MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
|
||||
CAN_TEX2D_GATHER = 44,
|
||||
MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
|
||||
MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
|
||||
MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
|
||||
MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
|
||||
MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
|
||||
PCI_DOMAIN_ID = 50,
|
||||
TEXTURE_PITCH_ALIGNMENT = 51,
|
||||
MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
|
||||
MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
|
||||
MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
|
||||
MAXIMUM_SURFACE1D_WIDTH = 55,
|
||||
MAXIMUM_SURFACE2D_WIDTH = 56,
|
||||
MAXIMUM_SURFACE2D_HEIGHT = 57,
|
||||
MAXIMUM_SURFACE3D_WIDTH = 58,
|
||||
MAXIMUM_SURFACE3D_HEIGHT = 59,
|
||||
MAXIMUM_SURFACE3D_DEPTH = 60,
|
||||
MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
|
||||
MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
|
||||
MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
|
||||
MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
|
||||
MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
|
||||
MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
|
||||
MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
|
||||
MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
|
||||
MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
|
||||
MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
|
||||
MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
|
||||
MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
|
||||
MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
|
||||
MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
|
||||
COMPUTE_CAPABILITY_MAJOR = 75,
|
||||
COMPUTE_CAPABILITY_MINOR = 76,
|
||||
MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
|
||||
STREAM_PRIORITIES_SUPPORTED = 78,
|
||||
GLOBAL_L1_CACHE_SUPPORTED = 79,
|
||||
LOCAL_L1_CACHE_SUPPORTED = 80,
|
||||
MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
|
||||
MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
|
||||
MANAGED_MEMORY = 83,
|
||||
MULTI_GPU_BOARD = 84,
|
||||
MULTI_GPU_BOARD_GROUP_ID = 85,
|
||||
HOST_NATIVE_ATOMIC_SUPPORTED = 86,
|
||||
SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
|
||||
PAGEABLE_MEMORY_ACCESS = 88,
|
||||
CONCURRENT_MANAGED_ACCESS = 89,
|
||||
COMPUTE_PREEMPTION_SUPPORTED = 90,
|
||||
CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
|
||||
CAN_USE_STREAM_MEM_OPS = 92,
|
||||
CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
|
||||
CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
|
||||
COOPERATIVE_LAUNCH = 95,
|
||||
COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
|
||||
MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
|
||||
CAN_FLUSH_REMOTE_WRITES = 98,
|
||||
HOST_REGISTER_SUPPORTED = 99,
|
||||
PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
|
||||
DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
|
||||
VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
|
||||
HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
|
||||
HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
|
||||
HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
|
||||
MAX = 106,
|
||||
}
|
||||
|
||||
impl Result {
|
||||
pub fn from_l0(result: l0::ze_result_t) -> Result {
|
||||
match result {
|
||||
|
@ -4,15 +4,12 @@ extern crate lazy_static;
|
||||
|
||||
use std::sync::Mutex;
|
||||
use std::ptr;
|
||||
use std::cmp;
|
||||
use std::os::raw::{c_char, c_int, c_uint};
|
||||
|
||||
mod cu;
|
||||
mod export_table;
|
||||
mod ze;
|
||||
|
||||
use ze::Versioned;
|
||||
|
||||
macro_rules! l0_check_err {
|
||||
($exp:expr) => {
|
||||
{
|
||||
@ -24,24 +21,13 @@ macro_rules! l0_check_err {
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! l0_check {
|
||||
($exp:expr) => {
|
||||
{
|
||||
let result = unsafe{ $exp };
|
||||
if result != l0::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref GLOBAL_STATE: Mutex<Option<Driver>> = Mutex::new(None);
|
||||
}
|
||||
|
||||
pub struct Driver {
|
||||
base: l0::ze_driver_handle_t,
|
||||
devices: Vec::<l0::ze_device_handle_t>
|
||||
devices: Vec::<ze::Device>
|
||||
}
|
||||
unsafe impl Send for Driver {}
|
||||
unsafe impl Sync for Driver {}
|
||||
@ -58,7 +44,7 @@ impl Driver {
|
||||
if (count as usize) < devices.len() {
|
||||
devices.truncate(count as usize);
|
||||
}
|
||||
Ok(Driver{ base: handle, devices: devices })
|
||||
Ok(Driver{ base: handle, devices: ze::Device::new_vec(devices) })
|
||||
}
|
||||
|
||||
fn call<F: FnOnce(&mut Driver) -> l0::ze_result_t>(f: F) -> cu::Result {
|
||||
@ -75,6 +61,19 @@ impl Driver {
|
||||
}
|
||||
}
|
||||
|
||||
fn call_device<F: FnOnce(&mut ze::Device) -> l0::ze_result_t>(cu::Device(dev): cu::Device, f: F) -> cu::Result {
|
||||
if dev < 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
let dev = dev as usize;
|
||||
Driver::call(|driver| {
|
||||
if dev >= driver.devices.len() {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
f(&mut driver.devices[dev])
|
||||
})
|
||||
}
|
||||
|
||||
fn device_get_count(&self, count: *mut i32) -> l0::ze_result_t {
|
||||
unsafe { *count = self.devices.len() as i32 };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
@ -87,40 +86,6 @@ impl Driver {
|
||||
unsafe { *device = cu::Device(ordinal) };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}
|
||||
|
||||
fn device_get_name(&self, name: *mut c_char, len: c_int, cu::Device(dev): cu::Device) -> l0::ze_result_t {
|
||||
if (dev as usize) >= self.devices.len() {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
let mut props = Box::new(l0::ze_device_properties_t::new());
|
||||
l0_check! { l0::zeDeviceGetProperties(self.devices[dev as usize], props.as_mut()) };
|
||||
let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0);
|
||||
let dst_null_pos = cmp::min((len - 1) as usize, null_pos);
|
||||
unsafe { *(name.add(dst_null_pos)) = 0 };
|
||||
unsafe { std::ptr::copy_nonoverlapping(props.name.as_ptr(), name, dst_null_pos) };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}
|
||||
|
||||
fn device_total_mem(&self, bytes: *mut usize, cu::Device(dev): cu::Device) -> l0::ze_result_t {
|
||||
if (dev as usize) >= self.devices.len() {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
let dev = dev as usize;
|
||||
let mut count = 0;
|
||||
l0_check! { l0::zeDeviceGetMemoryProperties(self.devices[dev], &mut count, ptr::null_mut()) };
|
||||
if count == 0 {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
let mut props = vec![l0::ze_device_memory_properties_t::new(); count as usize];
|
||||
l0_check! { l0::zeDeviceGetMemoryProperties(self.devices[dev], &mut count, props.as_mut_ptr()) };
|
||||
let iter_count = cmp::min(count as usize, props.len());
|
||||
if iter_count == 0 {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
let max_mem = props.iter().take(iter_count).map(|p| p.totalSize).max().unwrap();
|
||||
unsafe { *bytes = max_mem as usize };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
@ -169,19 +134,28 @@ pub extern "C" fn cuDeviceGet(device: *mut cu::Device, ordinal: c_int) -> cu::Re
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuDeviceGetName(name: *mut c_char, len: c_int, dev: cu::Device) -> cu::Result {
|
||||
let cu::Device(dev_idx) = dev;
|
||||
if len <= 0 || dev_idx < 0 || name == ptr::null_mut() {
|
||||
pub extern "C" fn cuDeviceGetName(name: *mut c_char, len: c_int, dev_idx: cu::Device) -> cu::Result {
|
||||
if name == ptr::null_mut() || len <= 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call(|driver| driver.device_get_name(name, len, dev))
|
||||
Driver::call_device(dev_idx, |dev| dev.get_name(name, len))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev: cu::Device) -> cu::Result {
|
||||
let cu::Device(dev_idx) = dev;
|
||||
if dev_idx < 0 || bytes == ptr::null_mut() {
|
||||
pub extern "C" fn cuDeviceTotalMem_v2(bytes: *mut usize, dev_idx: cu::Device) -> cu::Result {
|
||||
if bytes == ptr::null_mut() {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call(|driver| driver.device_total_mem(bytes, dev))
|
||||
}
|
||||
Driver::call_device(dev_idx, |dev| dev.total_mem(bytes))
|
||||
}
|
||||
|
||||
/*
|
||||
#[no_mangle]
|
||||
pub extern "C" fn cuDeviceGetAttribute(pi: *mut c_int, attrib: cu::DeviceAttribute, dev: cu::Device) -> cu::Result {
|
||||
let cu::Device(dev_idx) = dev;
|
||||
if pi == ptr::null_mut() || dev_idx < 0 {
|
||||
return cu::Result::ERROR_INVALID_VALUE;
|
||||
}
|
||||
Driver::call(|driver| driver.device_get_attribute(bytes, dev))
|
||||
}
|
||||
*/
|
@ -1,5 +1,29 @@
|
||||
use level_zero_sys::*;
|
||||
|
||||
use std::cmp;
|
||||
use std::mem;
|
||||
use std::os::raw::{c_char, c_int};
|
||||
use std::ptr;
|
||||
|
||||
macro_rules! assert_size_eq {
|
||||
($x:ty, $($xs:ty),+ $(,)?) => {
|
||||
const _: fn() = || {
|
||||
$(let _ = ::std::mem::transmute::<$x, $xs>;)+
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! l0_check {
|
||||
($exp:expr) => {
|
||||
{
|
||||
let result = unsafe{ $exp };
|
||||
if result != l0::ze_result_t::ZE_RESULT_SUCCESS {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub trait Versioned : Sized {
|
||||
type Version;
|
||||
|
||||
@ -35,4 +59,40 @@ impl Versioned for ze_device_properties_t {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[repr(transparent)] // required so a Vec<ze_device_handle_t> can be safely transmutted to Vec<Device>
|
||||
pub struct Device(pub ze_device_handle_t);
|
||||
|
||||
impl Device {
|
||||
pub fn new_vec(v: Vec<ze_device_handle_t>) -> Vec<Device> {
|
||||
assert_size_eq!(Device, ze_device_handle_t);
|
||||
unsafe { mem::transmute(v) }
|
||||
}
|
||||
|
||||
pub fn get_name(self, name: *mut c_char, len: c_int) -> l0::ze_result_t {
|
||||
let mut props = Box::new(l0::ze_device_properties_t::new());
|
||||
l0_check! { l0::zeDeviceGetProperties(self.0, props.as_mut()) };
|
||||
let null_pos = props.name.iter().position(|&c| c == 0).unwrap_or(0);
|
||||
let dst_null_pos = cmp::min((len - 1) as usize, null_pos);
|
||||
unsafe { *(name.add(dst_null_pos)) = 0 };
|
||||
unsafe { std::ptr::copy_nonoverlapping(props.name.as_ptr(), name, dst_null_pos) };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}
|
||||
|
||||
pub fn total_mem(self, bytes: *mut usize) -> l0::ze_result_t {
|
||||
let mut count = 0;
|
||||
l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, ptr::null_mut()) };
|
||||
if count == 0 {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
let mut props = vec![l0::ze_device_memory_properties_t::new(); count as usize];
|
||||
l0_check! { l0::zeDeviceGetMemoryProperties(self.0, &mut count, props.as_mut_ptr()) };
|
||||
let iter_count = cmp::min(count as usize, props.len());
|
||||
if iter_count == 0 {
|
||||
return l0::ze_result_t::ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
let max_mem = props.iter().take(iter_count).map(|p| p.totalSize).max().unwrap();
|
||||
unsafe { *bytes = max_mem as usize };
|
||||
l0::ze_result_t::ZE_RESULT_SUCCESS
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user