Create infrastructure for performance libraries (#363)

This commit is contained in:
Andrzej Janik
2025-05-01 22:37:18 +02:00
committed by GitHub
parent adc4673a20
commit cc83b9f1f6
53 changed files with 38361 additions and 1670 deletions

View File

@ -1,4 +1,4 @@
FROM nvidia/cuda:12.4.1-base-ubuntu22.04 FROM nvidia/cuda:12.8.1-base-ubuntu24.04
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
wget \ wget \
@ -18,10 +18,14 @@ RUN wget https://apt.llvm.org/llvm.sh && \
./llvm.sh ${LLVM_VERSION} ./llvm.sh ${LLVM_VERSION}
# Feel free to change to a newer version if you have a newer verison on your host # Feel free to change to a newer version if you have a newer verison on your host
ARG CUDA_PKG_VERSION=12-4 ARG CUDA_PKG_VERSION=12-8
# Docker <-> host driver version compatiblity is newer host <-> older docker # Docker <-> host driver version compatiblity is newer host <-> older docker
# We don't care about a specific driver version, so pick oldest 5XX # We don't care about a specific driver version, so pick oldest 5XX compatible
ARG CUDA_DRIVER=515 ARG CUDA_DRIVER=570
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \
dpkg -i libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \
rm libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# CUDA headers need it for interop # CUDA headers need it for interop
libgl-dev libegl-dev libvdpau-dev \ libgl-dev libegl-dev libvdpau-dev \
@ -30,13 +34,18 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninter
cuda-nvml-dev-${CUDA_PKG_VERSION} \ cuda-nvml-dev-${CUDA_PKG_VERSION} \
cuda-cudart-${CUDA_PKG_VERSION} \ cuda-cudart-${CUDA_PKG_VERSION} \
cuda-profiler-api-${CUDA_PKG_VERSION} \ cuda-profiler-api-${CUDA_PKG_VERSION} \
cuda-nvcc-${CUDA_PKG_VERSION} cuda-nvcc-${CUDA_PKG_VERSION} \
libcudnn8-dev \
cudnn9-cuda-${CUDA_PKG_VERSION} \
libcufft-dev-${CUDA_PKG_VERSION} \
libcublas-dev-${CUDA_PKG_VERSION} \
libcusparse-dev-${CUDA_PKG_VERSION}
ARG ROCM_VERSION=6.3.1 ARG ROCM_VERSION=6.4
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \ RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \ wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \ gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} jammy main" > /etc/apt/sources.list.d/rocm.list && \ echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} noble main" > /etc/apt/sources.list.d/rocm.list && \
echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \ echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \
DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
rocminfo \ rocminfo \

View File

@ -7,7 +7,7 @@
}, },
"securityOpt": [ "seccomp=unconfined" ], "securityOpt": [ "seccomp=unconfined" ],
"runArgs": [ "runArgs": [
"--runtime=nvidia", //"--runtime=nvidia",
"--device=/dev/kfd", "--device=/dev/kfd",
"--device=/dev/dri", "--device=/dev/dri",
"--group-add=video" "--group-add=video"
@ -25,7 +25,7 @@
}, },
// https://aka.ms/dev-containers-non-root. // https://aka.ms/dev-containers-non-root.
"remoteUser": "root", "remoteUser": "root",
//"hostRequirements": { "gpu": "optional" } "hostRequirements": { "gpu": true },
"customizations": { "customizations": {
"vscode": { "vscode": {
"extensions": [ "mhutchie.git-graph" ] "extensions": [ "mhutchie.git-graph" ]

40
Cargo.lock generated
View File

@ -1343,6 +1343,30 @@ dependencies = [
"syn 2.0.89", "syn 2.0.89",
] ]
[[package]]
name = "zluda_blas"
version = "0.0.0"
dependencies = [
"cuda_base",
"cuda_types",
]
[[package]]
name = "zluda_blaslt"
version = "0.0.0"
dependencies = [
"cuda_base",
"cuda_types",
]
[[package]]
name = "zluda_dnn"
version = "0.0.0"
dependencies = [
"cuda_base",
"cuda_types",
]
[[package]] [[package]]
name = "zluda_dump" name = "zluda_dump"
version = "0.0.0" version = "0.0.0"
@ -1364,6 +1388,14 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "zluda_fft"
version = "0.0.0"
dependencies = [
"cuda_base",
"cuda_types",
]
[[package]] [[package]]
name = "zluda_inject" name = "zluda_inject"
version = "0.0.0" version = "0.0.0"
@ -1393,3 +1425,11 @@ dependencies = [
"wchar", "wchar",
"winapi", "winapi",
] ]
[[package]]
name = "zluda_sparse"
version = "0.0.0"
dependencies = [
"cuda_base",
"cuda_types",
]

View File

@ -20,6 +20,11 @@ members = [
"ptx_parser_macros_impl", "ptx_parser_macros_impl",
"xtask", "xtask",
"zluda_bindgen", "zluda_bindgen",
"zluda_dnn",
"zluda_blas",
"zluda_blaslt",
"zluda_fft",
"zluda_sparse",
] ]
default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect"] default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect"]

6861
cuda_base/src/cublas.rs Normal file

File diff suppressed because it is too large Load Diff

581
cuda_base/src/cublaslt.rs Normal file
View File

@ -0,0 +1,581 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
extern "system" {
#[must_use]
fn cublasLtCreate(
lightHandle: *mut cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
fn cublasLtDestroy(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublaslt::cublasStatus_t;
fn cublasLtGetStatusName(
status: cuda_types::cublaslt::cublasStatus_t,
) -> *const ::core::ffi::c_char;
fn cublasLtGetStatusString(
status: cuda_types::cublaslt::cublasStatus_t,
) -> *const ::core::ffi::c_char;
fn cublasLtGetVersion() -> usize;
fn cublasLtGetCudartVersion() -> usize;
#[must_use]
fn cublasLtGetProperty(
type_: cuda_types::cublaslt::libraryPropertyType,
value: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
fn cublasLtHeuristicsCacheGetCapacity(
capacity: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
fn cublasLtHeuristicsCacheSetCapacity(
capacity: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
/** Restricts usage of CPU instructions (ISA) specified by the flags in the mask.
Flags can be combined with bitwise OR(|) operator. Supported flags:
- 0x1 -- x86-64 AVX512 ISA
Default mask: 0 (any applicable ISA is allowed).
The function returns the previous value of the mask.
The function takes precedence over the environment variable CUBLASLT_DISABLE_CPU_INSTRUCTIONS_MASK.*/
fn cublasLtDisableCpuInstructionsSetMask(
mask: ::core::ffi::c_uint,
) -> ::core::ffi::c_uint;
#[must_use]
/** Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).
\retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized
\retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g.
when workspaceSizeInBytes is less than workspace required by configured
algo
\retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured
operation
\retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device
\retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device
\retval CUBLAS_STATUS_SUCCESS if the operation completed successfully*/
fn cublasLtMatmul(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
computeDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
alpha: *const ::core::ffi::c_void,
A: *const ::core::ffi::c_void,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
B: *const ::core::ffi::c_void,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
beta: *const ::core::ffi::c_void,
C: *const ::core::ffi::c_void,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
D: *mut ::core::ffi::c_void,
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
workspace: *mut ::core::ffi::c_void,
workspaceSizeInBytes: usize,
stream: cuda_types::cublaslt::cudaStream_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
Can be used to change memory order of data or to scale and shift the values.
\retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized
\retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g.
when A is not NULL, but Adesc is NULL
\retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured
operation
\retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device
\retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device
\retval CUBLAS_STATUS_SUCCESS if the operation completed successfully*/
fn cublasLtMatrixTransform(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
alpha: *const ::core::ffi::c_void,
A: *const ::core::ffi::c_void,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
beta: *const ::core::ffi::c_void,
B: *const ::core::ffi::c_void,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
C: *mut ::core::ffi::c_void,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
stream: cuda_types::cublaslt::cudaStream_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatrixLayoutInit_internal(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
size: usize,
type_: cuda_types::cublaslt::cudaDataType,
rows: u64,
cols: u64,
ld: i64,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matrix layout descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatrixLayoutCreate(
matLayout: *mut cuda_types::cublaslt::cublasLtMatrixLayout_t,
type_: cuda_types::cublaslt::cudaDataType,
rows: u64,
cols: u64,
ld: i64,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matrix layout descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatrixLayoutDestroy(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matrix layout descriptor attribute.
\param[in] matLayout The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatrixLayoutSetAttribute(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matrix layout descriptor attribute.
\param[in] matLayout The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatrixLayoutGetAttribute(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatmulDescInit_internal(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
size: usize,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matmul operation descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatmulDescCreate(
matmulDesc: *mut cuda_types::cublaslt::cublasLtMatmulDesc_t,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matmul operation descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatmulDescDestroy(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matmul operation descriptor attribute.
\param[in] matmulDesc The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatmulDescSetAttribute(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matmul operation descriptor attribute.
\param[in] matmulDesc The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulDescGetAttribute(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatrixTransformDescInit_internal(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
size: usize,
scaleType: cuda_types::cublaslt::cudaDataType,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matrix transform operation descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatrixTransformDescCreate(
transformDesc: *mut cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
scaleType: cuda_types::cublaslt::cudaDataType,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matrix transform operation descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatrixTransformDescDestroy(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matrix transform operation descriptor attribute.
\param[in] transformDesc The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatrixTransformDescSetAttribute(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matrix transform operation descriptor attribute.
\param[in] transformDesc The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number
of bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatrixTransformDescGetAttribute(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatmulPreferenceInit_internal(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
size: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matmul heuristic search preference descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatmulPreferenceCreate(
pref: *mut cuda_types::cublaslt::cublasLtMatmulPreference_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matmul heuristic search preference descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatmulPreferenceDestroy(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matmul heuristic search preference descriptor attribute.
\param[in] pref The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatmulPreferenceSetAttribute(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matmul heuristic search preference descriptor attribute.
\param[in] pref The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulPreferenceGetAttribute(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Query cublasLt heuristic for algorithm appropriate for given use case.
\param[in] lightHandle Pointer to the allocated cuBLASLt handle for the cuBLASLt
context. See cublasLtHandle_t.
\param[in] operationDesc Handle to the matrix multiplication descriptor.
\param[in] Adesc Handle to the layout descriptors for matrix A.
\param[in] Bdesc Handle to the layout descriptors for matrix B.
\param[in] Cdesc Handle to the layout descriptors for matrix C.
\param[in] Ddesc Handle to the layout descriptors for matrix D.
\param[in] preference Pointer to the structure holding the heuristic search
preferences descriptor. See cublasLtMatrixLayout_t.
\param[in] requestedAlgoCount Size of heuristicResultsArray (in elements) and requested
maximum number of algorithms to return.
\param[in, out] heuristicResultsArray Output algorithms and associated runtime characteristics,
ordered in increasing estimated compute time.
\param[out] returnAlgoCount The number of heuristicResultsArray elements written.
\retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero
\retval CUBLAS_STATUS_NOT_SUPPORTED if no heuristic function available for current configuration
\retval CUBLAS_STATUS_SUCCESS if query was successful, inspect
heuristicResultsArray[0 to (returnAlgoCount - 1)].state
for detail status of results*/
fn cublasLtMatmulAlgoGetHeuristic(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
operationDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
preference: cuda_types::cublaslt::cublasLtMatmulPreference_t,
requestedAlgoCount: ::core::ffi::c_int,
heuristicResultsArray: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
returnAlgoCount: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Routine to get all algo IDs that can potentially run
\param[in] int requestedAlgoCount requested number of algos (must be less or equal to size of algoIdsA
(in elements)) \param[out] algoIdsA array to write algoIds to \param[out] returnAlgoCount number of algoIds
actually written
\retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero
\retval CUBLAS_STATUS_SUCCESS if query was successful, inspect returnAlgoCount to get actual number of IDs
available*/
fn cublasLtMatmulAlgoGetIds(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
Atype: cuda_types::cublaslt::cudaDataType_t,
Btype: cuda_types::cublaslt::cudaDataType_t,
Ctype: cuda_types::cublaslt::cudaDataType_t,
Dtype: cuda_types::cublaslt::cudaDataType_t,
requestedAlgoCount: ::core::ffi::c_int,
algoIdsArray: *mut ::core::ffi::c_int,
returnAlgoCount: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Initialize algo structure
\retval CUBLAS_STATUS_INVALID_VALUE if algo is NULL or algoId is outside of recognized range
\retval CUBLAS_STATUS_NOT_SUPPORTED if algoId is not supported for given combination of data types
\retval CUBLAS_STATUS_SUCCESS if the structure was successfully initialized*/
fn cublasLtMatmulAlgoInit(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
Atype: cuda_types::cublaslt::cudaDataType_t,
Btype: cuda_types::cublaslt::cudaDataType_t,
Ctype: cuda_types::cublaslt::cudaDataType_t,
Dtype: cuda_types::cublaslt::cudaDataType_t,
algoId: ::core::ffi::c_int,
algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Check configured algo descriptor for correctness and support on current device.
Result includes required workspace size and calculated wave count.
CUBLAS_STATUS_SUCCESS doesn't fully guarantee algo will run (will fail if e.g. buffers are not correctly aligned);
but if cublasLtMatmulAlgoCheck fails, the algo will not run.
\param[in] algo algo configuration to check
\param[out] result result structure to report algo runtime characteristics; algo field is never updated
\retval CUBLAS_STATUS_INVALID_VALUE if matrix layout descriptors or operation descriptor don't match algo
descriptor
\retval CUBLAS_STATUS_NOT_SUPPORTED if algo configuration or data type combination is not currently supported on
given device
\retval CUBLAS_STATUS_ARCH_MISMATCH if algo configuration cannot be run using the selected device
\retval CUBLAS_STATUS_SUCCESS if check was successful*/
fn cublasLtMatmulAlgoCheck(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
operationDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
result: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get algo capability attribute.
E.g. to get list of supported Tile IDs:
cublasLtMatmulTile_t tiles[CUBLASLT_MATMUL_TILE_END];
size_t num_tiles, size_written;
if (cublasLtMatmulAlgoCapGetAttribute(algo, CUBLASLT_ALGO_CAP_TILE_IDS, tiles, sizeof(tiles), size_written) ==
CUBLAS_STATUS_SUCCESS) { num_tiles = size_written / sizeof(tiles[0]);
}
\param[in] algo The algo descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulAlgoCapGetAttribute(
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
attr: cuda_types::cublaslt::cublasLtMatmulAlgoCapAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set algo configuration attribute.
\param[in] algo The algo descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatmulAlgoConfigSetAttribute(
algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t,
attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get algo configuration attribute.
\param[in] algo The algo descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulAlgoConfigGetAttribute(
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Logger callback setter.
\param[in] callback a user defined callback function to be called by the logger
\retval CUBLAS_STATUS_SUCCESS if callback was set successfully*/
fn cublasLtLoggerSetCallback(
callback: cuda_types::cublaslt::cublasLtLoggerCallback_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Log file setter.
\param[in] file an open file with write permissions
\retval CUBLAS_STATUS_SUCCESS if log file was set successfully*/
fn cublasLtLoggerSetFile(file: *mut FILE) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Open log file.
\param[in] logFile log file path. if the log file does not exist, it will be created
\retval CUBLAS_STATUS_SUCCESS if log file was created successfully*/
fn cublasLtLoggerOpenFile(
logFile: *const ::core::ffi::c_char,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Log level setter.
\param[in] level log level, should be one of the following:
0. Off
1. Errors
2. Performance Trace
3. Performance Hints
4. Heuristics Trace
5. API Trace
\retval CUBLAS_STATUS_INVALID_VALUE if log level is not one of the above levels
\retval CUBLAS_STATUS_SUCCESS if log level was set successfully*/
fn cublasLtLoggerSetLevel(
level: ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Log mask setter.
\param[in] mask log mask, should be a combination of the following masks:
0. Off
1. Errors
2. Performance Trace
4. Performance Hints
8. Heuristics Trace
16. API Trace
\retval CUBLAS_STATUS_SUCCESS if log mask was set successfully*/
fn cublasLtLoggerSetMask(
mask: ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Disable logging for the entire session.
\retval CUBLAS_STATUS_SUCCESS if disabled logging*/
fn cublasLtLoggerForceDisable() -> cuda_types::cublaslt::cublasStatus_t;
}

File diff suppressed because it is too large Load Diff

2579
cuda_base/src/cudnn8.rs Normal file

File diff suppressed because it is too large Load Diff

2055
cuda_base/src/cudnn9.rs Normal file

File diff suppressed because it is too large Load Diff

368
cuda_base/src/cufft.rs Normal file
View File

@ -0,0 +1,368 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
extern "system" {
fn cufftPlan1d(
plan: *mut cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftPlan2d(
plan: *mut cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
) -> cuda_types::cufft::cufftResult;
fn cufftPlan3d(
plan: *mut cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
nz: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
) -> cuda_types::cufft::cufftResult;
fn cufftPlanMany(
plan: *mut cuda_types::cufft::cufftHandle,
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_int,
inembed: *mut ::core::ffi::c_int,
istride: ::core::ffi::c_int,
idist: ::core::ffi::c_int,
onembed: *mut ::core::ffi::c_int,
ostride: ::core::ffi::c_int,
odist: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftMakePlan1d(
plan: cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftMakePlan2d(
plan: cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftMakePlan3d(
plan: cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
nz: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftMakePlanMany(
plan: cuda_types::cufft::cufftHandle,
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_int,
inembed: *mut ::core::ffi::c_int,
istride: ::core::ffi::c_int,
idist: ::core::ffi::c_int,
onembed: *mut ::core::ffi::c_int,
ostride: ::core::ffi::c_int,
odist: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftMakePlanMany64(
plan: cuda_types::cufft::cufftHandle,
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_longlong,
inembed: *mut ::core::ffi::c_longlong,
istride: ::core::ffi::c_longlong,
idist: ::core::ffi::c_longlong,
onembed: *mut ::core::ffi::c_longlong,
ostride: ::core::ffi::c_longlong,
odist: ::core::ffi::c_longlong,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_longlong,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftGetSizeMany64(
plan: cuda_types::cufft::cufftHandle,
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_longlong,
inembed: *mut ::core::ffi::c_longlong,
istride: ::core::ffi::c_longlong,
idist: ::core::ffi::c_longlong,
onembed: *mut ::core::ffi::c_longlong,
ostride: ::core::ffi::c_longlong,
odist: ::core::ffi::c_longlong,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_longlong,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftEstimate1d(
nx: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftEstimate2d(
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftEstimate3d(
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
nz: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftEstimateMany(
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_int,
inembed: *mut ::core::ffi::c_int,
istride: ::core::ffi::c_int,
idist: ::core::ffi::c_int,
onembed: *mut ::core::ffi::c_int,
ostride: ::core::ffi::c_int,
odist: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftCreate(
handle: *mut cuda_types::cufft::cufftHandle,
) -> cuda_types::cufft::cufftResult;
fn cufftGetSize1d(
handle: cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftGetSize2d(
handle: cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftGetSize3d(
handle: cuda_types::cufft::cufftHandle,
nx: ::core::ffi::c_int,
ny: ::core::ffi::c_int,
nz: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftGetSizeMany(
handle: cuda_types::cufft::cufftHandle,
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_int,
inembed: *mut ::core::ffi::c_int,
istride: ::core::ffi::c_int,
idist: ::core::ffi::c_int,
onembed: *mut ::core::ffi::c_int,
ostride: ::core::ffi::c_int,
odist: ::core::ffi::c_int,
type_: cuda_types::cufft::cufftType,
batch: ::core::ffi::c_int,
workArea: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftGetSize(
handle: cuda_types::cufft::cufftHandle,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
fn cufftSetWorkArea(
plan: cuda_types::cufft::cufftHandle,
workArea: *mut ::core::ffi::c_void,
) -> cuda_types::cufft::cufftResult;
fn cufftSetAutoAllocation(
plan: cuda_types::cufft::cufftHandle,
autoAllocate: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftExecC2C(
plan: cuda_types::cufft::cufftHandle,
idata: *mut cuda_types::cufft::cufftComplex,
odata: *mut cuda_types::cufft::cufftComplex,
direction: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftExecR2C(
plan: cuda_types::cufft::cufftHandle,
idata: *mut cuda_types::cufft::cufftReal,
odata: *mut cuda_types::cufft::cufftComplex,
) -> cuda_types::cufft::cufftResult;
fn cufftExecC2R(
plan: cuda_types::cufft::cufftHandle,
idata: *mut cuda_types::cufft::cufftComplex,
odata: *mut cuda_types::cufft::cufftReal,
) -> cuda_types::cufft::cufftResult;
fn cufftExecZ2Z(
plan: cuda_types::cufft::cufftHandle,
idata: *mut cuda_types::cufft::cufftDoubleComplex,
odata: *mut cuda_types::cufft::cufftDoubleComplex,
direction: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftExecD2Z(
plan: cuda_types::cufft::cufftHandle,
idata: *mut cuda_types::cufft::cufftDoubleReal,
odata: *mut cuda_types::cufft::cufftDoubleComplex,
) -> cuda_types::cufft::cufftResult;
fn cufftExecZ2D(
plan: cuda_types::cufft::cufftHandle,
idata: *mut cuda_types::cufft::cufftDoubleComplex,
odata: *mut cuda_types::cufft::cufftDoubleReal,
) -> cuda_types::cufft::cufftResult;
fn cufftSetStream(
plan: cuda_types::cufft::cufftHandle,
stream: cuda_types::cufft::cudaStream_t,
) -> cuda_types::cufft::cufftResult;
fn cufftDestroy(
plan: cuda_types::cufft::cufftHandle,
) -> cuda_types::cufft::cufftResult;
fn cufftGetVersion(
version: *mut ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftGetProperty(
type_: cuda_types::cufft::libraryPropertyType,
value: *mut ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftSetPlanPropertyInt64(
plan: cuda_types::cufft::cufftHandle,
property: cuda_types::cufft::cufftProperty,
inputValueInt: ::core::ffi::c_longlong,
) -> cuda_types::cufft::cufftResult;
fn cufftGetPlanPropertyInt64(
plan: cuda_types::cufft::cufftHandle,
property: cuda_types::cufft::cufftProperty,
returnPtrValue: *mut ::core::ffi::c_longlong,
) -> cuda_types::cufft::cufftResult;
fn cufftResetPlanProperty(
plan: cuda_types::cufft::cufftHandle,
property: cuda_types::cufft::cufftProperty,
) -> cuda_types::cufft::cufftResult;
fn cufftXtSetGPUs(
handle: cuda_types::cufft::cufftHandle,
nGPUs: ::core::ffi::c_int,
whichGPUs: *mut ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftXtMalloc(
plan: cuda_types::cufft::cufftHandle,
descriptor: *mut *mut cuda_types::cufft::cudaLibXtDesc,
format: cuda_types::cufft::cufftXtSubFormat,
) -> cuda_types::cufft::cufftResult;
fn cufftXtMemcpy(
plan: cuda_types::cufft::cufftHandle,
dstPointer: *mut ::core::ffi::c_void,
srcPointer: *mut ::core::ffi::c_void,
type_: cuda_types::cufft::cufftXtCopyType,
) -> cuda_types::cufft::cufftResult;
fn cufftXtFree(
descriptor: *mut cuda_types::cufft::cudaLibXtDesc,
) -> cuda_types::cufft::cufftResult;
fn cufftXtSetWorkArea(
plan: cuda_types::cufft::cufftHandle,
workArea: *mut *mut ::core::ffi::c_void,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExecDescriptorC2C(
plan: cuda_types::cufft::cufftHandle,
input: *mut cuda_types::cufft::cudaLibXtDesc,
output: *mut cuda_types::cufft::cudaLibXtDesc,
direction: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExecDescriptorR2C(
plan: cuda_types::cufft::cufftHandle,
input: *mut cuda_types::cufft::cudaLibXtDesc,
output: *mut cuda_types::cufft::cudaLibXtDesc,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExecDescriptorC2R(
plan: cuda_types::cufft::cufftHandle,
input: *mut cuda_types::cufft::cudaLibXtDesc,
output: *mut cuda_types::cufft::cudaLibXtDesc,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExecDescriptorZ2Z(
plan: cuda_types::cufft::cufftHandle,
input: *mut cuda_types::cufft::cudaLibXtDesc,
output: *mut cuda_types::cufft::cudaLibXtDesc,
direction: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExecDescriptorD2Z(
plan: cuda_types::cufft::cufftHandle,
input: *mut cuda_types::cufft::cudaLibXtDesc,
output: *mut cuda_types::cufft::cudaLibXtDesc,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExecDescriptorZ2D(
plan: cuda_types::cufft::cufftHandle,
input: *mut cuda_types::cufft::cudaLibXtDesc,
output: *mut cuda_types::cufft::cudaLibXtDesc,
) -> cuda_types::cufft::cufftResult;
fn cufftXtQueryPlan(
plan: cuda_types::cufft::cufftHandle,
queryStruct: *mut ::core::ffi::c_void,
queryType: cuda_types::cufft::cufftXtQueryType,
) -> cuda_types::cufft::cufftResult;
fn cufftXtSetCallback(
plan: cuda_types::cufft::cufftHandle,
callback_routine: *mut *mut ::core::ffi::c_void,
cbType: cuda_types::cufft::cufftXtCallbackType,
caller_info: *mut *mut ::core::ffi::c_void,
) -> cuda_types::cufft::cufftResult;
fn cufftXtClearCallback(
plan: cuda_types::cufft::cufftHandle,
cbType: cuda_types::cufft::cufftXtCallbackType,
) -> cuda_types::cufft::cufftResult;
fn cufftXtSetCallbackSharedSize(
plan: cuda_types::cufft::cufftHandle,
cbType: cuda_types::cufft::cufftXtCallbackType,
sharedSize: usize,
) -> cuda_types::cufft::cufftResult;
fn cufftXtMakePlanMany(
plan: cuda_types::cufft::cufftHandle,
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_longlong,
inembed: *mut ::core::ffi::c_longlong,
istride: ::core::ffi::c_longlong,
idist: ::core::ffi::c_longlong,
inputtype: cuda_types::cufft::cudaDataType,
onembed: *mut ::core::ffi::c_longlong,
ostride: ::core::ffi::c_longlong,
odist: ::core::ffi::c_longlong,
outputtype: cuda_types::cufft::cudaDataType,
batch: ::core::ffi::c_longlong,
workSize: *mut usize,
executiontype: cuda_types::cufft::cudaDataType,
) -> cuda_types::cufft::cufftResult;
fn cufftXtGetSizeMany(
plan: cuda_types::cufft::cufftHandle,
rank: ::core::ffi::c_int,
n: *mut ::core::ffi::c_longlong,
inembed: *mut ::core::ffi::c_longlong,
istride: ::core::ffi::c_longlong,
idist: ::core::ffi::c_longlong,
inputtype: cuda_types::cufft::cudaDataType,
onembed: *mut ::core::ffi::c_longlong,
ostride: ::core::ffi::c_longlong,
odist: ::core::ffi::c_longlong,
outputtype: cuda_types::cufft::cudaDataType,
batch: ::core::ffi::c_longlong,
workSize: *mut usize,
executiontype: cuda_types::cufft::cudaDataType,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExec(
plan: cuda_types::cufft::cufftHandle,
input: *mut ::core::ffi::c_void,
output: *mut ::core::ffi::c_void,
direction: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftXtExecDescriptor(
plan: cuda_types::cufft::cufftHandle,
input: *mut cuda_types::cufft::cudaLibXtDesc,
output: *mut cuda_types::cufft::cudaLibXtDesc,
direction: ::core::ffi::c_int,
) -> cuda_types::cufft::cufftResult;
fn cufftXtSetWorkAreaPolicy(
plan: cuda_types::cufft::cufftHandle,
policy: cuda_types::cufft::cufftXtWorkAreaPolicy,
workSize: *mut usize,
) -> cuda_types::cufft::cufftResult;
}

5518
cuda_base/src/cusparse.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -15,6 +15,11 @@ use syn::{
const CUDA_RS: &'static str = include_str! {"cuda.rs"}; const CUDA_RS: &'static str = include_str! {"cuda.rs"};
const NVML_RS: &'static str = include_str! {"nvml.rs"}; const NVML_RS: &'static str = include_str! {"nvml.rs"};
const CUBLAS_RS: &'static str = include_str! {"cublas.rs"};
const CUBLASLT_RS: &'static str = include_str! {"cublaslt.rs"};
const CUFFT_RS: &'static str = include_str! {"cufft.rs"};
const CUSPARSE_RS: &'static str = include_str! {"cusparse.rs"};
const CUDNN9_RS: &'static str = include_str! {"cudnn9.rs"};
// This macro accepts following arguments: // This macro accepts following arguments:
// * `normal_macro`: ident for a normal macro // * `normal_macro`: ident for a normal macro
@ -35,6 +40,31 @@ pub fn cuda_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUDA_RS) function_declarations(tokens, CUDA_RS)
} }
#[proc_macro]
pub fn cublas_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUBLAS_RS)
}
#[proc_macro]
pub fn cublaslt_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUBLASLT_RS)
}
#[proc_macro]
pub fn cufft_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUFFT_RS)
}
#[proc_macro]
pub fn cusparse_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUSPARSE_RS)
}
#[proc_macro]
pub fn cudnn9_function_declarations(tokens: TokenStream) -> TokenStream {
function_declarations(tokens, CUDNN9_RS)
}
fn function_declarations(tokens: TokenStream, module: &str) -> TokenStream { fn function_declarations(tokens: TokenStream, module: &str) -> TokenStream {
let input = parse_macro_input!(tokens as FnDeclInput); let input = parse_macro_input!(tokens as FnDeclInput);
let mut cuda_module = syn::parse_str::<File>(module).unwrap(); let mut cuda_module = syn::parse_str::<File>(module).unwrap();

File diff suppressed because it is too large Load Diff

324
cuda_types/src/cublas.rs Normal file
View File

@ -0,0 +1,324 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
pub type __half = u16;
pub type __nv_bfloat16 = u16;
pub use super::cuda::cuComplex;
pub use super::cuda::cuDoubleComplex;
pub use super::cuda::cudaDataType;
pub use super::cuda::cudaDataType_t;
pub type cudaStream_t = super::cuda::CUstream;
pub use super::cuda::libraryPropertyType;
pub type cudaGraphExecUpdateResultInfo_st = super::cuda::CUgraphExecUpdateResultInfo_st;
pub type cudaAsyncNotificationType = super::cuda::CUasyncNotificationType_enum;
pub type cudaGraph_t = super::cuda::CUgraph;
pub const CUBLAS_VER_MAJOR: u32 = 12;
pub const CUBLAS_VER_MINOR: u32 = 8;
pub const CUBLAS_VER_PATCH: u32 = 4;
pub const CUBLAS_VER_BUILD: u32 = 1;
pub const CUBLAS_VERSION: u32 = 120804;
impl cublasStatus_t {
pub const CUBLAS_STATUS_SUCCESS: cublasStatus_t = cublasStatus_t(0);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_NOT_INITIALIZED: cublasStatus_t = cublasStatus_t(1);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_ALLOC_FAILED: cublasStatus_t = cublasStatus_t(3);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_INVALID_VALUE: cublasStatus_t = cublasStatus_t(7);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_ARCH_MISMATCH: cublasStatus_t = cublasStatus_t(8);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_MAPPING_ERROR: cublasStatus_t = cublasStatus_t(11);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_EXECUTION_FAILED: cublasStatus_t = cublasStatus_t(13);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_INTERNAL_ERROR: cublasStatus_t = cublasStatus_t(14);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_NOT_SUPPORTED: cublasStatus_t = cublasStatus_t(15);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_LICENSE_ERROR: cublasStatus_t = cublasStatus_t(16);
}
#[repr(transparent)]
#[must_use]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasStatus_t(pub ::core::ffi::c_uint);
impl cublasFillMode_t {
pub const CUBLAS_FILL_MODE_LOWER: cublasFillMode_t = cublasFillMode_t(0);
}
impl cublasFillMode_t {
pub const CUBLAS_FILL_MODE_UPPER: cublasFillMode_t = cublasFillMode_t(1);
}
impl cublasFillMode_t {
pub const CUBLAS_FILL_MODE_FULL: cublasFillMode_t = cublasFillMode_t(2);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasFillMode_t(pub ::core::ffi::c_uint);
impl cublasDiagType_t {
pub const CUBLAS_DIAG_NON_UNIT: cublasDiagType_t = cublasDiagType_t(0);
}
impl cublasDiagType_t {
pub const CUBLAS_DIAG_UNIT: cublasDiagType_t = cublasDiagType_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasDiagType_t(pub ::core::ffi::c_uint);
impl cublasSideMode_t {
pub const CUBLAS_SIDE_LEFT: cublasSideMode_t = cublasSideMode_t(0);
}
impl cublasSideMode_t {
pub const CUBLAS_SIDE_RIGHT: cublasSideMode_t = cublasSideMode_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasSideMode_t(pub ::core::ffi::c_uint);
impl cublasOperation_t {
pub const CUBLAS_OP_N: cublasOperation_t = cublasOperation_t(0);
}
impl cublasOperation_t {
pub const CUBLAS_OP_T: cublasOperation_t = cublasOperation_t(1);
}
impl cublasOperation_t {
pub const CUBLAS_OP_C: cublasOperation_t = cublasOperation_t(2);
}
impl cublasOperation_t {
pub const CUBLAS_OP_HERMITAN: cublasOperation_t = cublasOperation_t(2);
}
impl cublasOperation_t {
pub const CUBLAS_OP_CONJG: cublasOperation_t = cublasOperation_t(3);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasOperation_t(pub ::core::ffi::c_uint);
impl cublasPointerMode_t {
pub const CUBLAS_POINTER_MODE_HOST: cublasPointerMode_t = cublasPointerMode_t(0);
}
impl cublasPointerMode_t {
pub const CUBLAS_POINTER_MODE_DEVICE: cublasPointerMode_t = cublasPointerMode_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasPointerMode_t(pub ::core::ffi::c_uint);
impl cublasAtomicsMode_t {
pub const CUBLAS_ATOMICS_NOT_ALLOWED: cublasAtomicsMode_t = cublasAtomicsMode_t(0);
}
impl cublasAtomicsMode_t {
pub const CUBLAS_ATOMICS_ALLOWED: cublasAtomicsMode_t = cublasAtomicsMode_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasAtomicsMode_t(pub ::core::ffi::c_uint);
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DFALT: cublasGemmAlgo_t = cublasGemmAlgo_t(-1);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DEFAULT: cublasGemmAlgo_t = cublasGemmAlgo_t(-1);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO0: cublasGemmAlgo_t = cublasGemmAlgo_t(0);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO1: cublasGemmAlgo_t = cublasGemmAlgo_t(1);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO2: cublasGemmAlgo_t = cublasGemmAlgo_t(2);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO3: cublasGemmAlgo_t = cublasGemmAlgo_t(3);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO4: cublasGemmAlgo_t = cublasGemmAlgo_t(4);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO5: cublasGemmAlgo_t = cublasGemmAlgo_t(5);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO6: cublasGemmAlgo_t = cublasGemmAlgo_t(6);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO7: cublasGemmAlgo_t = cublasGemmAlgo_t(7);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO8: cublasGemmAlgo_t = cublasGemmAlgo_t(8);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO9: cublasGemmAlgo_t = cublasGemmAlgo_t(9);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO10: cublasGemmAlgo_t = cublasGemmAlgo_t(10);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO11: cublasGemmAlgo_t = cublasGemmAlgo_t(11);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO12: cublasGemmAlgo_t = cublasGemmAlgo_t(12);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO13: cublasGemmAlgo_t = cublasGemmAlgo_t(13);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO14: cublasGemmAlgo_t = cublasGemmAlgo_t(14);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO15: cublasGemmAlgo_t = cublasGemmAlgo_t(15);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO16: cublasGemmAlgo_t = cublasGemmAlgo_t(16);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO17: cublasGemmAlgo_t = cublasGemmAlgo_t(17);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO18: cublasGemmAlgo_t = cublasGemmAlgo_t(18);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO19: cublasGemmAlgo_t = cublasGemmAlgo_t(19);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO20: cublasGemmAlgo_t = cublasGemmAlgo_t(20);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO21: cublasGemmAlgo_t = cublasGemmAlgo_t(21);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO22: cublasGemmAlgo_t = cublasGemmAlgo_t(22);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO23: cublasGemmAlgo_t = cublasGemmAlgo_t(23);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DEFAULT_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(99);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DFALT_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(99);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO0_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(100);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO1_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(101);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO2_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(102);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO3_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(103);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO4_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(104);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO5_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(105);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO6_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(106);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO7_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(107);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO8_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(108);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO9_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(109);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO10_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(110);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO11_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(111);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO12_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(112);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO13_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(113);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO14_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(114);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO15_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(115);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasGemmAlgo_t(pub ::core::ffi::c_int);
impl cublasMath_t {
pub const CUBLAS_DEFAULT_MATH: cublasMath_t = cublasMath_t(0);
}
impl cublasMath_t {
pub const CUBLAS_TENSOR_OP_MATH: cublasMath_t = cublasMath_t(1);
}
impl cublasMath_t {
pub const CUBLAS_PEDANTIC_MATH: cublasMath_t = cublasMath_t(2);
}
impl cublasMath_t {
pub const CUBLAS_TF32_TENSOR_OP_MATH: cublasMath_t = cublasMath_t(3);
}
impl cublasMath_t {
pub const CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION: cublasMath_t = cublasMath_t(
16,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasMath_t(pub ::core::ffi::c_uint);
pub use super::cuda::cudaDataType as cublasDataType_t;
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_16F: cublasComputeType_t = cublasComputeType_t(64);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_16F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(65);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F: cublasComputeType_t = cublasComputeType_t(68);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(69);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_FAST_16F: cublasComputeType_t = cublasComputeType_t(74);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_FAST_16BF: cublasComputeType_t = cublasComputeType_t(
75,
);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_FAST_TF32: cublasComputeType_t = cublasComputeType_t(
77,
);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_64F: cublasComputeType_t = cublasComputeType_t(70);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_64F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(71);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32I: cublasComputeType_t = cublasComputeType_t(72);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32I_PEDANTIC: cublasComputeType_t = cublasComputeType_t(73);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasComputeType_t(pub ::core::ffi::c_uint);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cublasContext {
_unused: [u8; 0],
}
pub type cublasHandle_t = *mut cublasContext;
pub type cublasLogCallback = ::core::option::Option<
unsafe extern "C" fn(msg: *const ::core::ffi::c_char),
>;

5387
cuda_types/src/cublaslt.rs Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1478
cuda_types/src/cudnn.rs Normal file

File diff suppressed because it is too large Load Diff

576
cuda_types/src/cudnn8.rs Normal file
View File

@ -0,0 +1,576 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
pub type __half = u16;
pub type __nv_bfloat16 = u16;
pub use super::cuda::cuComplex;
pub use super::cuda::cuDoubleComplex;
pub use super::cuda::cudaDataType;
pub use super::cuda::cudaDataType_t;
pub type cudaStream_t = super::cuda::CUstream;
pub use super::cuda::libraryPropertyType;
pub type cudaGraphExecUpdateResultInfo_st = super::cuda::CUgraphExecUpdateResultInfo_st;
pub type cudaAsyncNotificationType = super::cuda::CUasyncNotificationType_enum;
pub type cudaGraph_t = super::cuda::CUgraph;
pub const CUDNN_MAJOR: u32 = 8;
pub const CUDNN_MINOR: u32 = 9;
pub const CUDNN_PATCHLEVEL: u32 = 7;
pub const CUDNN_VERSION: u32 = 8907;
pub const CUDNN_MAX_SM_MAJOR_NUMBER: u32 = 9;
pub const CUDNN_MAX_SM_MINOR_NUMBER: u32 = 0;
pub const CUDNN_MAX_DEVICE_VERSION: u32 = 900;
pub const CUDNN_SM_50: u32 = 500;
pub const CUDNN_SM_52: u32 = 520;
pub const CUDNN_SM_53: u32 = 530;
pub const CUDNN_SM_60: u32 = 600;
pub const CUDNN_SM_61: u32 = 610;
pub const CUDNN_SM_62: u32 = 620;
pub const CUDNN_SM_70: u32 = 700;
pub const CUDNN_SM_72: u32 = 720;
pub const CUDNN_SM_75: u32 = 750;
pub const CUDNN_SM_80: u32 = 800;
pub const CUDNN_SM_86: u32 = 860;
pub const CUDNN_SM_87: u32 = 870;
pub const CUDNN_SM_89: u32 = 890;
pub const CUDNN_SM_90: u32 = 900;
pub const CUDNN_SM_9X_END: u32 = 999;
pub const CUDNN_MIN_DEVICE_VERSION: u32 = 500;
pub const CUDNN_OPS_INFER_MAJOR: u32 = 8;
pub const CUDNN_OPS_INFER_MINOR: u32 = 9;
pub const CUDNN_OPS_INFER_PATCH: u32 = 7;
pub const CUDNN_DIM_MAX: u32 = 8;
pub const CUDNN_LRN_MIN_N: u32 = 1;
pub const CUDNN_LRN_MAX_N: u32 = 16;
pub const CUDNN_LRN_MIN_K: f64 = 0.00001;
pub const CUDNN_LRN_MIN_BETA: f64 = 0.01;
pub const CUDNN_BN_MIN_EPSILON: f64 = 0.0;
pub const CUDNN_OPS_TRAIN_MAJOR: u32 = 8;
pub const CUDNN_OPS_TRAIN_MINOR: u32 = 9;
pub const CUDNN_OPS_TRAIN_PATCH: u32 = 7;
pub const CUDNN_ADV_INFER_MAJOR: u32 = 8;
pub const CUDNN_ADV_INFER_MINOR: u32 = 9;
pub const CUDNN_ADV_INFER_PATCH: u32 = 7;
pub const CUDNN_RNN_PADDED_IO_DISABLED: u32 = 0;
pub const CUDNN_RNN_PADDED_IO_ENABLED: u32 = 1;
pub const CUDNN_SEQDATA_DIM_COUNT: u32 = 4;
pub const CUDNN_ATTN_QUERYMAP_ALL_TO_ONE: u32 = 0;
pub const CUDNN_ATTN_QUERYMAP_ONE_TO_ONE: u32 = 1;
pub const CUDNN_ATTN_DISABLE_PROJ_BIASES: u32 = 0;
pub const CUDNN_ATTN_ENABLE_PROJ_BIASES: u32 = 2;
pub const CUDNN_ATTN_WKIND_COUNT: u32 = 8;
pub const CUDNN_ADV_TRAIN_MAJOR: u32 = 8;
pub const CUDNN_ADV_TRAIN_MINOR: u32 = 9;
pub const CUDNN_ADV_TRAIN_PATCH: u32 = 7;
pub const CUDNN_CNN_INFER_MAJOR: u32 = 8;
pub const CUDNN_CNN_INFER_MINOR: u32 = 9;
pub const CUDNN_CNN_INFER_PATCH: u32 = 7;
pub const CUDNN_CNN_TRAIN_MAJOR: u32 = 8;
pub const CUDNN_CNN_TRAIN_MINOR: u32 = 9;
pub const CUDNN_CNN_TRAIN_PATCH: u32 = 7;
pub use super::cudnn::cudnnContext;
pub type cudnnHandle_t = *mut cudnnContext;
impl cudnnStatus_t {
pub const CUDNN_STATUS_SUCCESS: cudnnStatus_t = cudnnStatus_t(0);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_NOT_INITIALIZED: cudnnStatus_t = cudnnStatus_t(1);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_ALLOC_FAILED: cudnnStatus_t = cudnnStatus_t(2);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_BAD_PARAM: cudnnStatus_t = cudnnStatus_t(3);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_INTERNAL_ERROR: cudnnStatus_t = cudnnStatus_t(4);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_INVALID_VALUE: cudnnStatus_t = cudnnStatus_t(5);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_ARCH_MISMATCH: cudnnStatus_t = cudnnStatus_t(6);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_MAPPING_ERROR: cudnnStatus_t = cudnnStatus_t(7);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_EXECUTION_FAILED: cudnnStatus_t = cudnnStatus_t(8);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_NOT_SUPPORTED: cudnnStatus_t = cudnnStatus_t(9);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_LICENSE_ERROR: cudnnStatus_t = cudnnStatus_t(10);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING: cudnnStatus_t = cudnnStatus_t(
11,
);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_RUNTIME_IN_PROGRESS: cudnnStatus_t = cudnnStatus_t(12);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_RUNTIME_FP_OVERFLOW: cudnnStatus_t = cudnnStatus_t(13);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_VERSION_MISMATCH: cudnnStatus_t = cudnnStatus_t(14);
}
#[repr(transparent)]
#[must_use]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnStatus_t(pub ::core::ffi::c_uint);
pub use super::cudnn::cudnnRuntimeTag_t;
pub use super::cudnn::cudnnErrQueryMode_t;
pub use super::cudnn::cudnnTensorStruct;
pub type cudnnTensorDescriptor_t = *mut cudnnTensorStruct;
pub use super::cudnn::cudnnPoolingStruct;
pub type cudnnPoolingDescriptor_t = *mut cudnnPoolingStruct;
pub use super::cudnn::cudnnFilterStruct;
pub type cudnnFilterDescriptor_t = *mut cudnnFilterStruct;
pub use super::cudnn::cudnnLRNStruct;
pub type cudnnLRNDescriptor_t = *mut cudnnLRNStruct;
pub use super::cudnn::cudnnActivationStruct;
pub type cudnnActivationDescriptor_t = *mut cudnnActivationStruct;
pub use super::cudnn::cudnnSpatialTransformerStruct;
pub type cudnnSpatialTransformerDescriptor_t = *mut cudnnSpatialTransformerStruct;
pub use super::cudnn::cudnnOpTensorStruct;
pub type cudnnOpTensorDescriptor_t = *mut cudnnOpTensorStruct;
pub use super::cudnn::cudnnReduceTensorStruct;
pub type cudnnReduceTensorDescriptor_t = *mut cudnnReduceTensorStruct;
pub use super::cudnn::cudnnCTCLossStruct;
pub type cudnnCTCLossDescriptor_t = *mut cudnnCTCLossStruct;
pub use super::cudnn::cudnnTensorTransformStruct;
pub type cudnnTensorTransformDescriptor_t = *mut cudnnTensorTransformStruct;
pub use super::cudnn9::cudnnDataType_t;
pub use super::cudnn::cudnnMathType_t;
pub use super::cudnn::cudnnNanPropagation_t;
pub use super::cudnn::cudnnDeterminism_t;
pub use super::cudnn::cudnnTensorFormat_t;
pub use super::cudnn::cudnnFoldingDirection_t;
pub use super::cudnn::cudnnOpTensorOp_t;
pub use super::cudnn::cudnnReduceTensorOp_t;
pub use super::cudnn::cudnnReduceTensorIndices_t;
pub use super::cudnn::cudnnIndicesType_t;
pub use super::cudnn::cudnnSoftmaxAlgorithm_t;
pub use super::cudnn::cudnnSoftmaxMode_t;
pub use super::cudnn::cudnnPoolingMode_t;
pub use super::cudnn::cudnnActivationMode_t;
pub use super::cudnn::cudnnLRNMode_t;
pub use super::cudnn::cudnnDivNormMode_t;
pub use super::cudnn::cudnnBatchNormMode_t;
pub use super::cudnn::cudnnBatchNormOps_t;
pub use super::cudnn::cudnnNormMode_t;
pub use super::cudnn::cudnnNormAlgo_t;
pub use super::cudnn::cudnnNormOps_t;
pub use super::cudnn::cudnnSamplerType_t;
pub use super::cudnn::cudnnDropoutStruct;
pub type cudnnDropoutDescriptor_t = *mut cudnnDropoutStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cudnnAlgorithmStruct {
_unused: [u8; 0],
}
pub type cudnnAlgorithmDescriptor_t = *mut cudnnAlgorithmStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cudnnAlgorithmPerformanceStruct {
_unused: [u8; 0],
}
pub type cudnnAlgorithmPerformance_t = *mut cudnnAlgorithmPerformanceStruct;
pub use super::cudnn::cudnnConvolutionFwdAlgo_t;
pub use super::cudnn::cudnnConvolutionBwdFilterAlgo_t;
pub use super::cudnn::cudnnConvolutionBwdDataAlgo_t;
pub use super::cudnn::cudnnRNNAlgo_t;
pub use super::cudnn::cudnnCTCLossAlgo_t;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct cudnnAlgorithmUnionStruct {
pub algo: cudnnAlgorithmUnionStruct_Algorithm,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub union cudnnAlgorithmUnionStruct_Algorithm {
pub convFwdAlgo: cudnnConvolutionFwdAlgo_t,
pub convBwdFilterAlgo: cudnnConvolutionBwdFilterAlgo_t,
pub convBwdDataAlgo: cudnnConvolutionBwdDataAlgo_t,
pub RNNAlgo: cudnnRNNAlgo_t,
pub CTCLossAlgo: cudnnCTCLossAlgo_t,
}
pub type cudnnAlgorithm_t = cudnnAlgorithmUnionStruct;
pub use super::cudnn::cudnnSeverity_t;
#[repr(C)]
pub struct cudnnDebugStruct {
pub cudnn_version: ::core::ffi::c_uint,
pub cudnnStatus: cudnnStatus_t,
pub time_sec: ::core::ffi::c_uint,
pub time_usec: ::core::ffi::c_uint,
pub time_delta: ::core::ffi::c_uint,
pub handle: cudnnHandle_t,
pub stream: cudaStream_t,
pub pid: ::core::ffi::c_ulonglong,
pub tid: ::core::ffi::c_ulonglong,
pub cudaDeviceId: ::core::ffi::c_int,
pub reserved: [::core::ffi::c_int; 15usize],
}
pub type cudnnDebug_t = cudnnDebugStruct;
pub type cudnnCallback_t = ::core::option::Option<
unsafe extern "C" fn(
sev: cudnnSeverity_t,
udata: *mut ::core::ffi::c_void,
dbg: *const cudnnDebug_t,
msg: *const ::core::ffi::c_char,
),
>;
pub use super::cudnn::cudnnForwardMode_t;
pub use super::cudnn::cudnnRNNMode_t;
pub use super::cudnn::cudnnRNNBiasMode_t;
pub use super::cudnn::cudnnDirectionMode_t;
pub use super::cudnn::cudnnRNNInputMode_t;
pub use super::cudnn::cudnnRNNClipMode_t;
pub use super::cudnn::cudnnRNNDataLayout_t;
pub type cudnnRNNPaddingMode_t = ::core::ffi::c_uint;
pub use super::cudnn::cudnnRNNStruct;
pub type cudnnRNNDescriptor_t = *mut cudnnRNNStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cudnnPersistentRNNPlan {
_unused: [u8; 0],
}
pub type cudnnPersistentRNNPlan_t = *mut cudnnPersistentRNNPlan;
pub use super::cudnn::cudnnRNNDataStruct;
pub type cudnnRNNDataDescriptor_t = *mut cudnnRNNDataStruct;
pub use super::cudnn::cudnnSeqDataAxis_t;
pub use super::cudnn::cudnnSeqDataStruct;
pub type cudnnSeqDataDescriptor_t = *mut cudnnSeqDataStruct;
pub type cudnnAttnQueryMap_t = ::core::ffi::c_uint;
pub use super::cudnn::cudnnAttnStruct;
pub type cudnnAttnDescriptor_t = *mut cudnnAttnStruct;
pub use super::cudnn::cudnnMultiHeadAttnWeightKind_t;
pub use super::cudnn::cudnnWgradMode_t;
pub use super::cudnn::cudnnLossNormalizationMode_t;
pub use super::cudnn::cudnnConvolutionStruct;
pub type cudnnConvolutionDescriptor_t = *mut cudnnConvolutionStruct;
pub use super::cudnn::cudnnConvolutionMode_t;
pub use super::cudnn::cudnnReorderType_t;
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct cudnnConvolutionFwdAlgoPerfStruct {
pub algo: cudnnConvolutionFwdAlgo_t,
pub status: cudnnStatus_t,
pub time: f32,
pub memory: usize,
pub determinism: cudnnDeterminism_t,
pub mathType: cudnnMathType_t,
pub reserved: [::core::ffi::c_int; 3usize],
}
pub type cudnnConvolutionFwdAlgoPerf_t = cudnnConvolutionFwdAlgoPerfStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct cudnnConvolutionBwdDataAlgoPerfStruct {
pub algo: cudnnConvolutionBwdDataAlgo_t,
pub status: cudnnStatus_t,
pub time: f32,
pub memory: usize,
pub determinism: cudnnDeterminism_t,
pub mathType: cudnnMathType_t,
pub reserved: [::core::ffi::c_int; 3usize],
}
pub type cudnnConvolutionBwdDataAlgoPerf_t = cudnnConvolutionBwdDataAlgoPerfStruct;
pub use super::cudnn::cudnnFusedOpsConstParamStruct;
pub type cudnnFusedOpsConstParamPack_t = *mut cudnnFusedOpsConstParamStruct;
pub use super::cudnn::cudnnFusedOpsVariantParamStruct;
pub type cudnnFusedOpsVariantParamPack_t = *mut cudnnFusedOpsVariantParamStruct;
pub use super::cudnn::cudnnFusedOpsPlanStruct;
pub type cudnnFusedOpsPlan_t = *mut cudnnFusedOpsPlanStruct;
pub use super::cudnn::cudnnFusedOps_t;
pub use super::cudnn::cudnnFusedOpsConstParamLabel_t;
pub use super::cudnn::cudnnFusedOpsPointerPlaceHolder_t;
pub use super::cudnn::cudnnFusedOpsVariantParamLabel_t;
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct cudnnConvolutionBwdFilterAlgoPerfStruct {
pub algo: cudnnConvolutionBwdFilterAlgo_t,
pub status: cudnnStatus_t,
pub time: f32,
pub memory: usize,
pub determinism: cudnnDeterminism_t,
pub mathType: cudnnMathType_t,
pub reserved: [::core::ffi::c_int; 3usize],
}
pub type cudnnConvolutionBwdFilterAlgoPerf_t = cudnnConvolutionBwdFilterAlgoPerfStruct;
pub type cudnnBackendDescriptor_t = *mut ::core::ffi::c_void;
pub use super::cudnn::cudnnFractionStruct;
pub type cudnnFraction_t = cudnnFractionStruct;
pub use super::cudnn9::cudnnPointwiseMode_t;
pub use super::cudnn::cudnnResampleMode_t;
pub use super::cudnn::cudnnSignalMode_t;
pub use super::cudnn::cudnnGenStatsMode_t;
pub use super::cudnn::cudnnBnFinalizeStatsMode_t;
pub use super::cudnn::cudnnRngDistribution_t;
pub use super::cudnn9::cudnnBackendAttributeName_t;
pub use super::cudnn::cudnnBackendAttributeType_t;
pub use super::cudnn9::cudnnBackendDescriptorType_t;
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_TENSOR_CORE: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
0,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
1,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
2,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_FFT: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
3,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
4,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
5,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
6,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
7,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
8,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_TYPE_COUNT: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
9,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnBackendNumericalNote_t(pub ::core::ffi::c_uint);
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
0,
);
}
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
1,
);
}
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
2,
);
}
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_TYPE_COUNT: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
3,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnBackendBehaviorNote_t(pub ::core::ffi::c_uint);
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_K: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
0,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SWIZZLE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
1,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_SIZE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
2,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_USE_TEX: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
3,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_EDGE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(4);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_KBLOCK: cudnnBackendKnobType_t = cudnnBackendKnobType_t(5);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LDGA: cudnnBackendKnobType_t = cudnnBackendKnobType_t(6);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LDGB: cudnnBackendKnobType_t = cudnnBackendKnobType_t(7);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_CHUNK_K: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
8,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_H: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
9,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_WINO_TILE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
10,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_MULTIPLY: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
11,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_K_BUF: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
12,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILEK: cudnnBackendKnobType_t = cudnnBackendKnobType_t(13);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_STAGES: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
14,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_REDUCTION_MODE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
15,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
16,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_K_SLC: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
17,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_IDX_MODE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
18,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SLICED: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
19,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_RS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
20,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SINGLEBUFFER: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
21,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LDGC: cudnnBackendKnobType_t = cudnnBackendKnobType_t(22);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPECFILT: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
23,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_KERNEL_CFG: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
24,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_WORKSPACE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
25,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_CGA: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
26,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_CGA_M: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
27,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_CGA_N: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
28,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_BLOCK_SIZE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
29,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_OCCUPANCY: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
30,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
31,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
32,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_COLS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
33,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_ROWS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
34,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_COLS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
35,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LOAD_SIZE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
36,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_COUNTS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
37,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnBackendKnobType_t(pub ::core::ffi::c_uint);
pub use super::cudnn::cudnnBackendLayoutType_t;
pub use super::cudnn::cudnnBackendHeurMode_t;
pub use super::cudnn9::cudnnBackendTensorReordering_t;
pub use super::cudnn::cudnnPaddingMode_t;
pub use super::cudnn9::cudnnBackendNormMode_t;
pub use super::cudnn::cudnnBackendNormFwdPhase_t;

2404
cuda_types/src/cudnn9.rs Normal file

File diff suppressed because it is too large Load Diff

427
cuda_types/src/cufft.rs Normal file
View File

@ -0,0 +1,427 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
pub type __half = u16;
pub type __nv_bfloat16 = u16;
pub use super::cuda::cuComplex;
pub use super::cuda::cuDoubleComplex;
pub use super::cuda::cudaDataType;
pub use super::cuda::cudaDataType_t;
pub type cudaStream_t = super::cuda::CUstream;
pub use super::cuda::libraryPropertyType;
pub type cudaGraphExecUpdateResultInfo_st = super::cuda::CUgraphExecUpdateResultInfo_st;
pub type cudaAsyncNotificationType = super::cuda::CUasyncNotificationType_enum;
pub type cudaGraph_t = super::cuda::CUgraph;
pub const CUFFT_VER_MAJOR: u32 = 11;
pub const CUFFT_VER_MINOR: u32 = 3;
pub const CUFFT_VER_PATCH: u32 = 3;
pub const CUFFT_VER_BUILD: u32 = 83;
pub const CUFFT_VERSION: u32 = 11303;
pub const CUFFT_FORWARD: i32 = -1;
pub const CUFFT_INVERSE: u32 = 1;
impl libFormat_t {
pub const LIB_FORMAT_CUFFT: libFormat_t = libFormat_t(0);
}
impl libFormat_t {
pub const LIB_FORMAT_UNDEFINED: libFormat_t = libFormat_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct libFormat_t(pub ::core::ffi::c_uint);
pub use self::libFormat_t as libFormat;
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudaXtDesc_t {
pub version: ::core::ffi::c_int,
pub nGPUs: ::core::ffi::c_int,
pub GPUs: [::core::ffi::c_int; 64usize],
pub data: [*mut ::core::ffi::c_void; 64usize],
pub size: [usize; 64usize],
pub cudaXtState: *mut ::core::ffi::c_void,
}
pub type cudaXtDesc = cudaXtDesc_t;
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudaLibXtDesc_t {
pub version: ::core::ffi::c_int,
pub descriptor: *mut cudaXtDesc,
pub library: libFormat,
pub subFormat: ::core::ffi::c_int,
pub libDescriptor: *mut ::core::ffi::c_void,
}
pub type cudaLibXtDesc = cudaLibXtDesc_t;
impl cufftResult_t {
pub const CUFFT_SUCCESS: cufftResult_t = cufftResult_t(0);
}
impl cufftResult_t {
pub const CUFFT_INVALID_PLAN: cufftResult_t = cufftResult_t(1);
}
impl cufftResult_t {
pub const CUFFT_ALLOC_FAILED: cufftResult_t = cufftResult_t(2);
}
impl cufftResult_t {
pub const CUFFT_INVALID_TYPE: cufftResult_t = cufftResult_t(3);
}
impl cufftResult_t {
pub const CUFFT_INVALID_VALUE: cufftResult_t = cufftResult_t(4);
}
impl cufftResult_t {
pub const CUFFT_INTERNAL_ERROR: cufftResult_t = cufftResult_t(5);
}
impl cufftResult_t {
pub const CUFFT_EXEC_FAILED: cufftResult_t = cufftResult_t(6);
}
impl cufftResult_t {
pub const CUFFT_SETUP_FAILED: cufftResult_t = cufftResult_t(7);
}
impl cufftResult_t {
pub const CUFFT_INVALID_SIZE: cufftResult_t = cufftResult_t(8);
}
impl cufftResult_t {
pub const CUFFT_UNALIGNED_DATA: cufftResult_t = cufftResult_t(9);
}
impl cufftResult_t {
pub const CUFFT_INCOMPLETE_PARAMETER_LIST: cufftResult_t = cufftResult_t(10);
}
impl cufftResult_t {
pub const CUFFT_INVALID_DEVICE: cufftResult_t = cufftResult_t(11);
}
impl cufftResult_t {
pub const CUFFT_PARSE_ERROR: cufftResult_t = cufftResult_t(12);
}
impl cufftResult_t {
pub const CUFFT_NO_WORKSPACE: cufftResult_t = cufftResult_t(13);
}
impl cufftResult_t {
pub const CUFFT_NOT_IMPLEMENTED: cufftResult_t = cufftResult_t(14);
}
impl cufftResult_t {
pub const CUFFT_LICENSE_ERROR: cufftResult_t = cufftResult_t(15);
}
impl cufftResult_t {
pub const CUFFT_NOT_SUPPORTED: cufftResult_t = cufftResult_t(16);
}
#[repr(transparent)]
#[must_use]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftResult_t(pub ::core::ffi::c_uint);
pub use self::cufftResult_t as cufftResult;
pub type cufftReal = f32;
pub type cufftDoubleReal = f64;
pub type cufftComplex = super::cuda::cuComplex;
pub type cufftDoubleComplex = super::cuda::cuDoubleComplex;
impl cufftType_t {
pub const CUFFT_R2C: cufftType_t = cufftType_t(42);
}
impl cufftType_t {
pub const CUFFT_C2R: cufftType_t = cufftType_t(44);
}
impl cufftType_t {
pub const CUFFT_C2C: cufftType_t = cufftType_t(41);
}
impl cufftType_t {
pub const CUFFT_D2Z: cufftType_t = cufftType_t(106);
}
impl cufftType_t {
pub const CUFFT_Z2D: cufftType_t = cufftType_t(108);
}
impl cufftType_t {
pub const CUFFT_Z2Z: cufftType_t = cufftType_t(105);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftType_t(pub ::core::ffi::c_uint);
pub use self::cufftType_t as cufftType;
impl cufftCompatibility_t {
pub const CUFFT_COMPATIBILITY_FFTW_PADDING: cufftCompatibility_t = cufftCompatibility_t(
1,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftCompatibility_t(pub ::core::ffi::c_uint);
pub use self::cufftCompatibility_t as cufftCompatibility;
pub type cufftHandle = ::core::ffi::c_int;
impl cufftProperty_t {
pub const NVFFT_PLAN_PROPERTY_INT64_PATIENT_JIT: cufftProperty_t = cufftProperty_t(
1,
);
}
impl cufftProperty_t {
pub const NVFFT_PLAN_PROPERTY_INT64_MAX_NUM_HOST_THREADS: cufftProperty_t = cufftProperty_t(
2,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftProperty_t(pub ::core::ffi::c_uint);
pub use self::cufftProperty_t as cufftProperty;
impl cufftXtSubFormat_t {
pub const CUFFT_XT_FORMAT_INPUT: cufftXtSubFormat_t = cufftXtSubFormat_t(0);
}
impl cufftXtSubFormat_t {
pub const CUFFT_XT_FORMAT_OUTPUT: cufftXtSubFormat_t = cufftXtSubFormat_t(1);
}
impl cufftXtSubFormat_t {
pub const CUFFT_XT_FORMAT_INPLACE: cufftXtSubFormat_t = cufftXtSubFormat_t(2);
}
impl cufftXtSubFormat_t {
pub const CUFFT_XT_FORMAT_INPLACE_SHUFFLED: cufftXtSubFormat_t = cufftXtSubFormat_t(
3,
);
}
impl cufftXtSubFormat_t {
pub const CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED: cufftXtSubFormat_t = cufftXtSubFormat_t(
4,
);
}
impl cufftXtSubFormat_t {
pub const CUFFT_XT_FORMAT_DISTRIBUTED_INPUT: cufftXtSubFormat_t = cufftXtSubFormat_t(
5,
);
}
impl cufftXtSubFormat_t {
pub const CUFFT_XT_FORMAT_DISTRIBUTED_OUTPUT: cufftXtSubFormat_t = cufftXtSubFormat_t(
6,
);
}
impl cufftXtSubFormat_t {
pub const CUFFT_FORMAT_UNDEFINED: cufftXtSubFormat_t = cufftXtSubFormat_t(7);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftXtSubFormat_t(pub ::core::ffi::c_uint);
pub use self::cufftXtSubFormat_t as cufftXtSubFormat;
impl cufftXtCopyType_t {
pub const CUFFT_COPY_HOST_TO_DEVICE: cufftXtCopyType_t = cufftXtCopyType_t(0);
}
impl cufftXtCopyType_t {
pub const CUFFT_COPY_DEVICE_TO_HOST: cufftXtCopyType_t = cufftXtCopyType_t(1);
}
impl cufftXtCopyType_t {
pub const CUFFT_COPY_DEVICE_TO_DEVICE: cufftXtCopyType_t = cufftXtCopyType_t(2);
}
impl cufftXtCopyType_t {
pub const CUFFT_COPY_UNDEFINED: cufftXtCopyType_t = cufftXtCopyType_t(3);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftXtCopyType_t(pub ::core::ffi::c_uint);
pub use self::cufftXtCopyType_t as cufftXtCopyType;
impl cufftXtQueryType_t {
pub const CUFFT_QUERY_1D_FACTORS: cufftXtQueryType_t = cufftXtQueryType_t(0);
}
impl cufftXtQueryType_t {
pub const CUFFT_QUERY_UNDEFINED: cufftXtQueryType_t = cufftXtQueryType_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftXtQueryType_t(pub ::core::ffi::c_uint);
pub use self::cufftXtQueryType_t as cufftXtQueryType;
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftXt1dFactors_t {
pub size: ::core::ffi::c_longlong,
pub stringCount: ::core::ffi::c_longlong,
pub stringLength: ::core::ffi::c_longlong,
pub substringLength: ::core::ffi::c_longlong,
pub factor1: ::core::ffi::c_longlong,
pub factor2: ::core::ffi::c_longlong,
pub stringMask: ::core::ffi::c_longlong,
pub substringMask: ::core::ffi::c_longlong,
pub factor1Mask: ::core::ffi::c_longlong,
pub factor2Mask: ::core::ffi::c_longlong,
pub stringShift: ::core::ffi::c_int,
pub substringShift: ::core::ffi::c_int,
pub factor1Shift: ::core::ffi::c_int,
pub factor2Shift: ::core::ffi::c_int,
}
pub type cufftXt1dFactors = cufftXt1dFactors_t;
impl cufftXtWorkAreaPolicy_t {
pub const CUFFT_WORKAREA_MINIMAL: cufftXtWorkAreaPolicy_t = cufftXtWorkAreaPolicy_t(
0,
);
}
impl cufftXtWorkAreaPolicy_t {
pub const CUFFT_WORKAREA_USER: cufftXtWorkAreaPolicy_t = cufftXtWorkAreaPolicy_t(1);
}
impl cufftXtWorkAreaPolicy_t {
pub const CUFFT_WORKAREA_PERFORMANCE: cufftXtWorkAreaPolicy_t = cufftXtWorkAreaPolicy_t(
2,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftXtWorkAreaPolicy_t(pub ::core::ffi::c_uint);
pub use self::cufftXtWorkAreaPolicy_t as cufftXtWorkAreaPolicy;
impl cufftXtCallbackType_t {
pub const CUFFT_CB_LD_COMPLEX: cufftXtCallbackType_t = cufftXtCallbackType_t(0);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_LD_COMPLEX_DOUBLE: cufftXtCallbackType_t = cufftXtCallbackType_t(
1,
);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_LD_REAL: cufftXtCallbackType_t = cufftXtCallbackType_t(2);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_LD_REAL_DOUBLE: cufftXtCallbackType_t = cufftXtCallbackType_t(3);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_ST_COMPLEX: cufftXtCallbackType_t = cufftXtCallbackType_t(4);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_ST_COMPLEX_DOUBLE: cufftXtCallbackType_t = cufftXtCallbackType_t(
5,
);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_ST_REAL: cufftXtCallbackType_t = cufftXtCallbackType_t(6);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_ST_REAL_DOUBLE: cufftXtCallbackType_t = cufftXtCallbackType_t(7);
}
impl cufftXtCallbackType_t {
pub const CUFFT_CB_UNDEFINED: cufftXtCallbackType_t = cufftXtCallbackType_t(8);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cufftXtCallbackType_t(pub ::core::ffi::c_uint);
pub use self::cufftXtCallbackType_t as cufftXtCallbackType;
pub type cufftCallbackLoadC = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: usize,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftComplex,
>;
pub type cufftCallbackLoadZ = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: usize,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftDoubleComplex,
>;
pub type cufftCallbackLoadR = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: usize,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftReal,
>;
pub type cufftCallbackLoadD = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: usize,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftDoubleReal,
>;
pub type cufftCallbackStoreC = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: usize,
element: cufftComplex,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;
pub type cufftCallbackStoreZ = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: usize,
element: cufftDoubleComplex,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;
pub type cufftCallbackStoreR = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: usize,
element: cufftReal,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;
pub type cufftCallbackStoreD = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: usize,
element: cufftDoubleReal,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;
pub type cufftJITCallbackLoadC = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftComplex,
>;
pub type cufftJITCallbackLoadZ = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftDoubleComplex,
>;
pub type cufftJITCallbackLoadR = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftReal,
>;
pub type cufftJITCallbackLoadD = ::core::option::Option<
unsafe extern "C" fn(
dataIn: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
) -> cufftDoubleReal,
>;
pub type cufftJITCallbackStoreC = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
element: cufftComplex,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;
pub type cufftJITCallbackStoreZ = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
element: cufftDoubleComplex,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;
pub type cufftJITCallbackStoreR = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
element: cufftReal,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;
pub type cufftJITCallbackStoreD = ::core::option::Option<
unsafe extern "C" fn(
dataOut: *mut ::core::ffi::c_void,
offset: ::core::ffi::c_ulonglong,
element: cufftDoubleReal,
callerInfo: *mut ::core::ffi::c_void,
sharedPointer: *mut ::core::ffi::c_void,
),
>;

550
cuda_types/src/cusparse.rs Normal file
View File

@ -0,0 +1,550 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
pub type __half = u16;
pub type __nv_bfloat16 = u16;
pub use super::cuda::cuComplex;
pub use super::cuda::cuDoubleComplex;
pub use super::cuda::cudaDataType;
pub use super::cuda::cudaDataType_t;
pub type cudaStream_t = super::cuda::CUstream;
pub use super::cuda::libraryPropertyType;
pub type cudaGraphExecUpdateResultInfo_st = super::cuda::CUgraphExecUpdateResultInfo_st;
pub type cudaAsyncNotificationType = super::cuda::CUasyncNotificationType_enum;
pub type cudaGraph_t = super::cuda::CUgraph;
pub const CUSPARSE_VER_MAJOR: u32 = 12;
pub const CUSPARSE_VER_MINOR: u32 = 5;
pub const CUSPARSE_VER_PATCH: u32 = 8;
pub const CUSPARSE_VER_BUILD: u32 = 93;
pub const CUSPARSE_VERSION: u32 = 12508;
/// Result information returned by cudaGraphExecUpdate
pub type cudaGraphExecUpdateResultInfo = cudaGraphExecUpdateResultInfo_st;
/// Information describing an async notification event
#[repr(C)]
pub struct cudaAsyncNotificationInfo {
pub type_: cudaAsyncNotificationType,
pub info: cudaAsyncNotificationInfo__bindgen_ty_1,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub union cudaAsyncNotificationInfo__bindgen_ty_1 {
pub overBudget: cudaAsyncNotificationInfo__bindgen_ty_1__bindgen_ty_1,
}
#[repr(C)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudaAsyncNotificationInfo__bindgen_ty_1__bindgen_ty_1 {
pub bytesOverBudget: ::core::ffi::c_ulonglong,
}
/// Information describing an async notification event
pub type cudaAsyncNotificationInfo_t = cudaAsyncNotificationInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseContext {
_unused: [u8; 0],
}
pub type cusparseHandle_t = *mut cusparseContext;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseMatDescr {
_unused: [u8; 0],
}
pub type cusparseMatDescr_t = *mut cusparseMatDescr;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsrsv2Info {
_unused: [u8; 0],
}
pub type bsrsv2Info_t = *mut bsrsv2Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsrsm2Info {
_unused: [u8; 0],
}
pub type bsrsm2Info_t = *mut bsrsm2Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct csric02Info {
_unused: [u8; 0],
}
pub type csric02Info_t = *mut csric02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsric02Info {
_unused: [u8; 0],
}
pub type bsric02Info_t = *mut bsric02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct csrilu02Info {
_unused: [u8; 0],
}
pub type csrilu02Info_t = *mut csrilu02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct bsrilu02Info {
_unused: [u8; 0],
}
pub type bsrilu02Info_t = *mut bsrilu02Info;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct csru2csrInfo {
_unused: [u8; 0],
}
pub type csru2csrInfo_t = *mut csru2csrInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseColorInfo {
_unused: [u8; 0],
}
pub type cusparseColorInfo_t = *mut cusparseColorInfo;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct pruneInfo {
_unused: [u8; 0],
}
pub type pruneInfo_t = *mut pruneInfo;
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_SUCCESS: cusparseStatus_t = cusparseStatus_t(0);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_NOT_INITIALIZED: cusparseStatus_t = cusparseStatus_t(1);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_ALLOC_FAILED: cusparseStatus_t = cusparseStatus_t(2);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_INVALID_VALUE: cusparseStatus_t = cusparseStatus_t(3);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_ARCH_MISMATCH: cusparseStatus_t = cusparseStatus_t(4);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_MAPPING_ERROR: cusparseStatus_t = cusparseStatus_t(5);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_EXECUTION_FAILED: cusparseStatus_t = cusparseStatus_t(6);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_INTERNAL_ERROR: cusparseStatus_t = cusparseStatus_t(7);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: cusparseStatus_t = cusparseStatus_t(
8,
);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_ZERO_PIVOT: cusparseStatus_t = cusparseStatus_t(9);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_NOT_SUPPORTED: cusparseStatus_t = cusparseStatus_t(10);
}
impl cusparseStatus_t {
pub const CUSPARSE_STATUS_INSUFFICIENT_RESOURCES: cusparseStatus_t = cusparseStatus_t(
11,
);
}
#[repr(transparent)]
#[must_use]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseStatus_t(pub ::core::ffi::c_uint);
impl cusparsePointerMode_t {
pub const CUSPARSE_POINTER_MODE_HOST: cusparsePointerMode_t = cusparsePointerMode_t(
0,
);
}
impl cusparsePointerMode_t {
pub const CUSPARSE_POINTER_MODE_DEVICE: cusparsePointerMode_t = cusparsePointerMode_t(
1,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparsePointerMode_t(pub ::core::ffi::c_uint);
impl cusparseAction_t {
pub const CUSPARSE_ACTION_SYMBOLIC: cusparseAction_t = cusparseAction_t(0);
}
impl cusparseAction_t {
pub const CUSPARSE_ACTION_NUMERIC: cusparseAction_t = cusparseAction_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseAction_t(pub ::core::ffi::c_uint);
impl cusparseMatrixType_t {
pub const CUSPARSE_MATRIX_TYPE_GENERAL: cusparseMatrixType_t = cusparseMatrixType_t(
0,
);
}
impl cusparseMatrixType_t {
pub const CUSPARSE_MATRIX_TYPE_SYMMETRIC: cusparseMatrixType_t = cusparseMatrixType_t(
1,
);
}
impl cusparseMatrixType_t {
pub const CUSPARSE_MATRIX_TYPE_HERMITIAN: cusparseMatrixType_t = cusparseMatrixType_t(
2,
);
}
impl cusparseMatrixType_t {
pub const CUSPARSE_MATRIX_TYPE_TRIANGULAR: cusparseMatrixType_t = cusparseMatrixType_t(
3,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseMatrixType_t(pub ::core::ffi::c_uint);
impl cusparseFillMode_t {
pub const CUSPARSE_FILL_MODE_LOWER: cusparseFillMode_t = cusparseFillMode_t(0);
}
impl cusparseFillMode_t {
pub const CUSPARSE_FILL_MODE_UPPER: cusparseFillMode_t = cusparseFillMode_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseFillMode_t(pub ::core::ffi::c_uint);
impl cusparseDiagType_t {
pub const CUSPARSE_DIAG_TYPE_NON_UNIT: cusparseDiagType_t = cusparseDiagType_t(0);
}
impl cusparseDiagType_t {
pub const CUSPARSE_DIAG_TYPE_UNIT: cusparseDiagType_t = cusparseDiagType_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseDiagType_t(pub ::core::ffi::c_uint);
impl cusparseIndexBase_t {
pub const CUSPARSE_INDEX_BASE_ZERO: cusparseIndexBase_t = cusparseIndexBase_t(0);
}
impl cusparseIndexBase_t {
pub const CUSPARSE_INDEX_BASE_ONE: cusparseIndexBase_t = cusparseIndexBase_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseIndexBase_t(pub ::core::ffi::c_uint);
impl cusparseOperation_t {
pub const CUSPARSE_OPERATION_NON_TRANSPOSE: cusparseOperation_t = cusparseOperation_t(
0,
);
}
impl cusparseOperation_t {
pub const CUSPARSE_OPERATION_TRANSPOSE: cusparseOperation_t = cusparseOperation_t(1);
}
impl cusparseOperation_t {
pub const CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE: cusparseOperation_t = cusparseOperation_t(
2,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseOperation_t(pub ::core::ffi::c_uint);
impl cusparseDirection_t {
pub const CUSPARSE_DIRECTION_ROW: cusparseDirection_t = cusparseDirection_t(0);
}
impl cusparseDirection_t {
pub const CUSPARSE_DIRECTION_COLUMN: cusparseDirection_t = cusparseDirection_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseDirection_t(pub ::core::ffi::c_uint);
impl cusparseSolvePolicy_t {
pub const CUSPARSE_SOLVE_POLICY_NO_LEVEL: cusparseSolvePolicy_t = cusparseSolvePolicy_t(
0,
);
}
impl cusparseSolvePolicy_t {
pub const CUSPARSE_SOLVE_POLICY_USE_LEVEL: cusparseSolvePolicy_t = cusparseSolvePolicy_t(
1,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSolvePolicy_t(pub ::core::ffi::c_uint);
impl cusparseColorAlg_t {
pub const CUSPARSE_COLOR_ALG0: cusparseColorAlg_t = cusparseColorAlg_t(0);
}
impl cusparseColorAlg_t {
pub const CUSPARSE_COLOR_ALG1: cusparseColorAlg_t = cusparseColorAlg_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseColorAlg_t(pub ::core::ffi::c_uint);
pub type cusparseLoggerCallback_t = ::core::option::Option<
unsafe extern "C" fn(
logLevel: ::core::ffi::c_int,
functionName: *const ::core::ffi::c_char,
message: *const ::core::ffi::c_char,
),
>;
impl cusparseCsr2CscAlg_t {
pub const CUSPARSE_CSR2CSC_ALG_DEFAULT: cusparseCsr2CscAlg_t = cusparseCsr2CscAlg_t(
1,
);
}
impl cusparseCsr2CscAlg_t {
pub const CUSPARSE_CSR2CSC_ALG1: cusparseCsr2CscAlg_t = cusparseCsr2CscAlg_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseCsr2CscAlg_t(pub ::core::ffi::c_uint);
impl cusparseFormat_t {
///< Compressed Sparse Row (CSR)
pub const CUSPARSE_FORMAT_CSR: cusparseFormat_t = cusparseFormat_t(1);
}
impl cusparseFormat_t {
///< Compressed Sparse Column (CSC)
pub const CUSPARSE_FORMAT_CSC: cusparseFormat_t = cusparseFormat_t(2);
}
impl cusparseFormat_t {
///< Coordinate (COO) - Structure of Arrays
pub const CUSPARSE_FORMAT_COO: cusparseFormat_t = cusparseFormat_t(3);
}
impl cusparseFormat_t {
///< Blocked ELL
pub const CUSPARSE_FORMAT_BLOCKED_ELL: cusparseFormat_t = cusparseFormat_t(5);
}
impl cusparseFormat_t {
///< Blocked Compressed Sparse Row (BSR)
pub const CUSPARSE_FORMAT_BSR: cusparseFormat_t = cusparseFormat_t(6);
}
impl cusparseFormat_t {
///< Sliced ELL
pub const CUSPARSE_FORMAT_SLICED_ELLPACK: cusparseFormat_t = cusparseFormat_t(7);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseFormat_t(pub ::core::ffi::c_uint);
impl cusparseOrder_t {
///< Column-Major Order - Matrix memory layout
pub const CUSPARSE_ORDER_COL: cusparseOrder_t = cusparseOrder_t(1);
}
impl cusparseOrder_t {
///< Row-Major Order - Matrix memory layout
pub const CUSPARSE_ORDER_ROW: cusparseOrder_t = cusparseOrder_t(2);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseOrder_t(pub ::core::ffi::c_uint);
impl cusparseIndexType_t {
/**< 16-bit unsigned integer for matrix/vector
< indices*/
pub const CUSPARSE_INDEX_16U: cusparseIndexType_t = cusparseIndexType_t(1);
}
impl cusparseIndexType_t {
///< 32-bit signed integer for matrix/vector indices
pub const CUSPARSE_INDEX_32I: cusparseIndexType_t = cusparseIndexType_t(2);
}
impl cusparseIndexType_t {
///< 64-bit signed integer for matrix/vector indices
pub const CUSPARSE_INDEX_64I: cusparseIndexType_t = cusparseIndexType_t(3);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseIndexType_t(pub ::core::ffi::c_uint);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpVecDescr {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseDnVecDescr {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpMatDescr {
_unused: [u8; 0],
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseDnMatDescr {
_unused: [u8; 0],
}
pub type cusparseSpVecDescr_t = *mut cusparseSpVecDescr;
pub type cusparseDnVecDescr_t = *mut cusparseDnVecDescr;
pub type cusparseSpMatDescr_t = *mut cusparseSpMatDescr;
pub type cusparseDnMatDescr_t = *mut cusparseDnMatDescr;
pub type cusparseConstSpVecDescr_t = *const cusparseSpVecDescr;
pub type cusparseConstDnVecDescr_t = *const cusparseDnVecDescr;
pub type cusparseConstSpMatDescr_t = *const cusparseSpMatDescr;
pub type cusparseConstDnMatDescr_t = *const cusparseDnMatDescr;
impl cusparseSpMatAttribute_t {
pub const CUSPARSE_SPMAT_FILL_MODE: cusparseSpMatAttribute_t = cusparseSpMatAttribute_t(
0,
);
}
impl cusparseSpMatAttribute_t {
pub const CUSPARSE_SPMAT_DIAG_TYPE: cusparseSpMatAttribute_t = cusparseSpMatAttribute_t(
1,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpMatAttribute_t(pub ::core::ffi::c_uint);
impl cusparseSparseToDenseAlg_t {
pub const CUSPARSE_SPARSETODENSE_ALG_DEFAULT: cusparseSparseToDenseAlg_t = cusparseSparseToDenseAlg_t(
0,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSparseToDenseAlg_t(pub ::core::ffi::c_uint);
impl cusparseDenseToSparseAlg_t {
pub const CUSPARSE_DENSETOSPARSE_ALG_DEFAULT: cusparseDenseToSparseAlg_t = cusparseDenseToSparseAlg_t(
0,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseDenseToSparseAlg_t(pub ::core::ffi::c_uint);
impl cusparseSpMVAlg_t {
pub const CUSPARSE_SPMV_ALG_DEFAULT: cusparseSpMVAlg_t = cusparseSpMVAlg_t(0);
}
impl cusparseSpMVAlg_t {
pub const CUSPARSE_SPMV_CSR_ALG1: cusparseSpMVAlg_t = cusparseSpMVAlg_t(2);
}
impl cusparseSpMVAlg_t {
pub const CUSPARSE_SPMV_CSR_ALG2: cusparseSpMVAlg_t = cusparseSpMVAlg_t(3);
}
impl cusparseSpMVAlg_t {
pub const CUSPARSE_SPMV_COO_ALG1: cusparseSpMVAlg_t = cusparseSpMVAlg_t(1);
}
impl cusparseSpMVAlg_t {
pub const CUSPARSE_SPMV_COO_ALG2: cusparseSpMVAlg_t = cusparseSpMVAlg_t(4);
}
impl cusparseSpMVAlg_t {
pub const CUSPARSE_SPMV_SELL_ALG1: cusparseSpMVAlg_t = cusparseSpMVAlg_t(5);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpMVAlg_t(pub ::core::ffi::c_uint);
impl cusparseSpSVAlg_t {
pub const CUSPARSE_SPSV_ALG_DEFAULT: cusparseSpSVAlg_t = cusparseSpSVAlg_t(0);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpSVAlg_t(pub ::core::ffi::c_uint);
impl cusparseSpSVUpdate_t {
pub const CUSPARSE_SPSV_UPDATE_GENERAL: cusparseSpSVUpdate_t = cusparseSpSVUpdate_t(
0,
);
}
impl cusparseSpSVUpdate_t {
pub const CUSPARSE_SPSV_UPDATE_DIAGONAL: cusparseSpSVUpdate_t = cusparseSpSVUpdate_t(
1,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpSVUpdate_t(pub ::core::ffi::c_uint);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpSVDescr {
_unused: [u8; 0],
}
pub type cusparseSpSVDescr_t = *mut cusparseSpSVDescr;
impl cusparseSpSMAlg_t {
pub const CUSPARSE_SPSM_ALG_DEFAULT: cusparseSpSMAlg_t = cusparseSpSMAlg_t(0);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpSMAlg_t(pub ::core::ffi::c_uint);
impl cusparseSpSMUpdate_t {
pub const CUSPARSE_SPSM_UPDATE_GENERAL: cusparseSpSMUpdate_t = cusparseSpSMUpdate_t(
0,
);
}
impl cusparseSpSMUpdate_t {
pub const CUSPARSE_SPSM_UPDATE_DIAGONAL: cusparseSpSMUpdate_t = cusparseSpSMUpdate_t(
1,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpSMUpdate_t(pub ::core::ffi::c_uint);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpSMDescr {
_unused: [u8; 0],
}
pub type cusparseSpSMDescr_t = *mut cusparseSpSMDescr;
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_ALG_DEFAULT: cusparseSpMMAlg_t = cusparseSpMMAlg_t(0);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_COO_ALG1: cusparseSpMMAlg_t = cusparseSpMMAlg_t(1);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_COO_ALG2: cusparseSpMMAlg_t = cusparseSpMMAlg_t(2);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_COO_ALG3: cusparseSpMMAlg_t = cusparseSpMMAlg_t(3);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_COO_ALG4: cusparseSpMMAlg_t = cusparseSpMMAlg_t(5);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_CSR_ALG1: cusparseSpMMAlg_t = cusparseSpMMAlg_t(4);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_CSR_ALG2: cusparseSpMMAlg_t = cusparseSpMMAlg_t(6);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_CSR_ALG3: cusparseSpMMAlg_t = cusparseSpMMAlg_t(12);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_BLOCKED_ELL_ALG1: cusparseSpMMAlg_t = cusparseSpMMAlg_t(13);
}
impl cusparseSpMMAlg_t {
pub const CUSPARSE_SPMM_BSR_ALG1: cusparseSpMMAlg_t = cusparseSpMMAlg_t(14);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpMMAlg_t(pub ::core::ffi::c_uint);
impl cusparseSpGEMMAlg_t {
pub const CUSPARSE_SPGEMM_DEFAULT: cusparseSpGEMMAlg_t = cusparseSpGEMMAlg_t(0);
}
impl cusparseSpGEMMAlg_t {
pub const CUSPARSE_SPGEMM_CSR_ALG_DETERMINITIC: cusparseSpGEMMAlg_t = cusparseSpGEMMAlg_t(
1,
);
}
impl cusparseSpGEMMAlg_t {
pub const CUSPARSE_SPGEMM_CSR_ALG_NONDETERMINITIC: cusparseSpGEMMAlg_t = cusparseSpGEMMAlg_t(
2,
);
}
impl cusparseSpGEMMAlg_t {
pub const CUSPARSE_SPGEMM_ALG1: cusparseSpGEMMAlg_t = cusparseSpGEMMAlg_t(3);
}
impl cusparseSpGEMMAlg_t {
pub const CUSPARSE_SPGEMM_ALG2: cusparseSpGEMMAlg_t = cusparseSpGEMMAlg_t(4);
}
impl cusparseSpGEMMAlg_t {
pub const CUSPARSE_SPGEMM_ALG3: cusparseSpGEMMAlg_t = cusparseSpGEMMAlg_t(5);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpGEMMAlg_t(pub ::core::ffi::c_uint);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpGEMMDescr {
_unused: [u8; 0],
}
pub type cusparseSpGEMMDescr_t = *mut cusparseSpGEMMDescr;
impl cusparseSDDMMAlg_t {
pub const CUSPARSE_SDDMM_ALG_DEFAULT: cusparseSDDMMAlg_t = cusparseSDDMMAlg_t(0);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSDDMMAlg_t(pub ::core::ffi::c_uint);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cusparseSpMMOpPlan {
_unused: [u8; 0],
}
pub type cusparseSpMMOpPlan_t = *mut cusparseSpMMOpPlan;
impl cusparseSpMMOpAlg_t {
pub const CUSPARSE_SPMM_OP_ALG_DEFAULT: cusparseSpMMOpAlg_t = cusparseSpMMOpAlg_t(0);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cusparseSpMMOpAlg_t(pub ::core::ffi::c_uint);

View File

@ -1,2 +1,9 @@
pub mod cublas;
pub mod cublaslt;
pub mod cuda; pub mod cuda;
pub mod cudnn;
pub mod cudnn8;
pub mod cudnn9;
pub mod cufft;
pub mod cusparse;
pub mod nvml; pub mod nvml;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -284,7 +284,7 @@ fn immediate_value<'a, 'input>(stream: &mut PtxParser<'a, 'input>) -> PResult<as
.parse_next(stream) .parse_next(stream)
} }
pub fn parse_for_errors<'input>(text: &'input str) -> Vec<PtxError> { pub fn parse_for_errors<'input>(text: &'input str) -> Vec<PtxError<'input>> {
let (tokens, mut errors) = lex_with_span_unchecked(text); let (tokens, mut errors) = lex_with_span_unchecked(text);
let parse_result = { let parse_result = {
let state = PtxParserState::new(text, &mut errors); let state = PtxParserState::new(text, &mut errors);
@ -307,7 +307,7 @@ pub fn parse_for_errors<'input>(text: &'input str) -> Vec<PtxError> {
fn lex_with_span_unchecked<'input>( fn lex_with_span_unchecked<'input>(
text: &'input str, text: &'input str,
) -> (Vec<(Token<'input>, logos::Span)>, Vec<PtxError>) { ) -> (Vec<(Token<'input>, logos::Span)>, Vec<PtxError<'input>>) {
let lexer = Token::lexer(text); let lexer = Token::lexer(text);
let mut result = Vec::new(); let mut result = Vec::new();
let mut errors = Vec::new(); let mut errors = Vec::new();
@ -322,7 +322,7 @@ fn lex_with_span_unchecked<'input>(
pub fn parse_module_checked<'input>( pub fn parse_module_checked<'input>(
text: &'input str, text: &'input str,
) -> Result<ast::Module<'input>, Vec<PtxError>> { ) -> Result<ast::Module<'input>, Vec<PtxError<'input>>> {
let mut lexer = Token::lexer(text); let mut lexer = Token::lexer(text);
let mut errors = Vec::new(); let mut errors = Vec::new();
let mut tokens = Vec::new(); let mut tokens = Vec::new();
@ -1194,7 +1194,7 @@ impl<Ident> ast::ParsedOperand<Ident> {
) -> PResult<ast::ParsedOperand<&'input str>> { ) -> PResult<ast::ParsedOperand<&'input str>> {
use winnow::combinator::*; use winnow::combinator::*;
use winnow::token::any; use winnow::token::any;
fn vector_index<'input>(inp: &'input str) -> Result<u8, PtxError> { fn vector_index<'input>(inp: &'input str) -> Result<u8, PtxError<'input>> {
match inp { match inp {
".x" | ".r" => Ok(0), ".x" | ".r" => Ok(0),
".y" | ".g" => Ok(1), ".y" | ".g" => Ok(1),

View File

@ -5,3 +5,5 @@
#include <cudaEGL.h> #include <cudaEGL.h>
#include <vdpau/vdpau.h> #include <vdpau/vdpau.h>
#include <cudaVDPAU.h> #include <cudaVDPAU.h>
#include <library_types.h>
#include <cuComplex.h>

View File

@ -0,0 +1 @@
#include <cudnn_adv_infer_v8.h>

View File

@ -0,0 +1 @@
#include <cudnn_adv_train_v8.h>

View File

@ -0,0 +1 @@
#include <cudnn_backend_v8.h>

View File

@ -0,0 +1 @@
#include <cudnn_cnn_infer_v8.h>

View File

@ -0,0 +1 @@
#include <cudnn_cnn_train_v8.h>

View File

@ -0,0 +1 @@
#include <cudnn_ops_infer_v8.h>

View File

@ -0,0 +1 @@
#include <cudnn_ops_train_v8.h>

View File

@ -0,0 +1 @@
#include <cudnn_version_v8.h>

View File

@ -0,0 +1,2 @@
#include <cufftXt.h>
#include <cudalibxt.h>

View File

@ -1,11 +1,13 @@
use proc_macro2::Span; use proc_macro2::Span;
use quote::{format_ident, quote, ToTokens}; use quote::{format_ident, quote, ToTokens};
use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hash::{FxHashMap, FxHashSet};
use std::{collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::FromStr}; use std::{
borrow::Cow, collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::FromStr,
};
use syn::{ use syn::{
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg, parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr, Path, ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr, Path,
PathArguments, Signature, Type, TypePath, UseTree, PathSegment PathArguments, PathSegment, Signature, Type, TypePath, UseTree,
}; };
fn main() { fn main() {
@ -14,23 +16,511 @@ fn main() {
&crate_root, &crate_root,
&["..", "ext", "hip_runtime-sys", "src", "lib.rs"], &["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
); );
generate_ml(&crate_root);
generate_cuda(&crate_root); generate_cuda(&crate_root);
generate_ml(&crate_root);
generate_cublas(&crate_root);
generate_cublaslt(&crate_root);
generate_cudnn(&crate_root);
generate_cufft(&crate_root);
generate_cusparse(&crate_root);
}
fn generate_cufft(crate_root: &PathBuf) {
let cufft_header = new_builder()
.header_contents("cufft_wraper.h", include_str!("../build/cufft_wraper.h"))
.header("/usr/local/cuda/include/cufftXt.h")
.allowlist_type("^cufft.*")
.allowlist_type("^cudaLibXtDesc.*")
.allowlist_type("^cudaXtDesc.*")
.allowlist_type("^libFormat.*")
.allowlist_function("^cufft.*")
.allowlist_var("^CUFFT_.*")
.must_use_type("cufftResult_t")
.allowlist_recursively(false)
.clang_args(["-I/usr/local/cuda/include"])
.generate()
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cufft_header).unwrap();
generate_functions(
&crate_root,
"cufft",
&["..", "cuda_base", "src", "cufft.rs"],
&module,
);
generate_types_library(
&crate_root,
&["..", "cuda_types", "src", "cufft.rs"],
&module,
)
}
fn generate_cusparse(crate_root: &PathBuf) {
let cufft_header = new_builder()
.header("/usr/local/cuda/include/cusparse_v2.h")
.allowlist_type("^cusparse.*")
.allowlist_type(".*Info_t$")
.allowlist_type(".*Info$")
.allowlist_function("^cusparse.*")
.allowlist_var("^CUSPARSE_.*")
.must_use_type("cusparseStatus_t")
.allowlist_recursively(false)
.clang_args(["-I/usr/local/cuda/include"])
.generate()
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cufft_header).unwrap();
generate_functions(
&crate_root,
"cusparse",
&["..", "cuda_base", "src", "cusparse.rs"],
&module,
);
generate_types_library(
&crate_root,
&["..", "cuda_types", "src", "cusparse.rs"],
&module,
)
}
fn generate_cudnn(crate_root: &PathBuf) {
let cudnn9 = new_builder()
.header("/usr/include/x86_64-linux-gnu/cudnn_v9.h")
.allowlist_type("^cudnn.*")
.allowlist_function("^cudnn.*")
.allowlist_var("^CUDNN_.*")
.must_use_type("cudnnStatus_t")
.allowlist_recursively(false)
.clang_args(["-I/usr/local/cuda/include"])
.generate()
.unwrap()
.to_string();
let cudnn9_module: syn::File = syn::parse_str(&cudnn9).unwrap();
let cudnn9_types = generate_types_library_impl(&cudnn9_module);
let mut current_dir = PathBuf::from(file!());
current_dir.pop();
let cudnn8 = new_builder()
.header("/usr/include/x86_64-linux-gnu/cudnn_v8.h")
.allowlist_type("^cudnn.*")
.allowlist_function("^cudnn.*")
.allowlist_var("^CUDNN_.*")
.must_use_type("cudnnStatus_t")
.allowlist_recursively(false)
.clang_args([
"-I/usr/local/cuda/include",
&format!("-I{}/../build/cudnn_v8", current_dir.display()),
])
.generate()
.unwrap()
.to_string();
let cudnn8_module: syn::File = syn::parse_str(&cudnn8).unwrap();
let cudnn8_types = generate_types_library_impl(&cudnn8_module);
merge_types(
&crate_root,
&["..", "cuda_types", "src", "cudnn.rs"],
cudnn9_types,
&["..", "cuda_types", "src", "cudnn9.rs"],
cudnn8_types,
&["..", "cuda_types", "src", "cudnn8.rs"],
);
generate_functions(
&crate_root,
"cudnn8",
&["..", "cuda_base", "src", "cudnn8.rs"],
&cudnn8_module,
);
generate_functions(
&crate_root,
"cudnn9",
&["..", "cuda_base", "src", "cudnn9.rs"],
&cudnn9_module,
);
}
// This code splits types (and constants) into one of:
// - cudnn8-specific
// - cudnn9-specific
// - cudnn shared
// With the rules being:
// - constants go to the version-specific files
// - if there's conflict between types they go to version-specific files
// - if the cudnn9 type is purely additive over cudnn8 then it goes into the
// shared (and is re-exported by both)
fn merge_types(
output: &PathBuf,
cudnn_path: &[&str],
cudnn9_types: syn::File,
cudnn9_path: &[&str],
cudnn8_types: syn::File,
cudnn8_path: &[&str],
) {
let cudnn_enums = merge_enums(&cudnn9_types, &cudnn8_types);
let conflicting_types = get_conflicting_structs(&cudnn9_types, &cudnn8_types, cudnn_enums);
write_common_cudnn_types(output, cudnn_path, &cudnn9_types, &conflicting_types);
write_cudnn8_types(output, cudnn8_path, &cudnn8_types, &conflicting_types);
write_cudnn9_types(output, cudnn9_path, &cudnn9_types, &conflicting_types);
}
fn write_cudnn9_types(
output: &PathBuf,
cudnn9_path: &[&str],
cudnn9_types: &syn::File,
conflicting_types: &FxHashMap<&Ident, CudnnEnumMergeResult>,
) {
let items = cudnn9_types.items.iter().filter_map(|item| match item {
Item::Impl(impl_) => match conflicting_types.get(type_to_ident(&*impl_.self_ty)) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Option::<syn::Item>::Some(parse_quote!( #impl_))
}
Some(CudnnEnumMergeResult::Same) => None,
},
Item::Struct(struct_) => match conflicting_types.get(&struct_.ident) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Some(parse_quote!( #struct_))
}
Some(CudnnEnumMergeResult::Same) => {
let type_ = &struct_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
},
Item::Enum(enum_) => match conflicting_types.get(&enum_.ident) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Some(parse_quote!( #enum_))
}
Some(CudnnEnumMergeResult::Same) => {
let type_ = &enum_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
},
Item::ForeignMod(ItemForeignMod { .. }) => None,
Item::Const(const_) => Some(parse_quote!(#const_)),
Item::Union(union_) => match conflicting_types.get(&union_.ident) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Some(parse_quote!( #union_))
}
Some(CudnnEnumMergeResult::Same) => {
let type_ = &union_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
},
Item::Use(use_) => Some(parse_quote!(#use_)),
Item::Type(type_) => Some(parse_quote!(#type_)),
_ => unimplemented!(),
});
let module: syn::File = parse_quote! {
#(#items)*
};
let mut output = output.clone();
output.extend(cudnn9_path);
let text = prettyplease::unparse(&module);
write_rust_to_file(output, &text)
}
fn write_cudnn8_types(
output: &PathBuf,
cudnn8_path: &[&str],
cudnn8_types: &syn::File,
conflicting_types: &FxHashMap<&Ident, CudnnEnumMergeResult>,
) {
let items = cudnn8_types.items.iter().filter_map(|item| match item {
Item::Impl(impl_) => match conflicting_types.get(type_to_ident(&*impl_.self_ty)) {
Some(CudnnEnumMergeResult::Conflict) | None => {
Option::<syn::Item>::Some(parse_quote!( #impl_))
}
Some(CudnnEnumMergeResult::Same) => None,
Some(CudnnEnumMergeResult::Cudnn9) => None,
},
Item::Struct(struct_) => match conflicting_types.get(&struct_.ident) {
Some(CudnnEnumMergeResult::Conflict) | None => Some(parse_quote!( #struct_)),
Some(CudnnEnumMergeResult::Same) => {
let type_ = &struct_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
Some(CudnnEnumMergeResult::Cudnn9) => {
let type_ = &struct_.ident;
Some(parse_quote!( pub use super::cudnn9:: #type_; ))
}
},
Item::Enum(enum_) => match conflicting_types.get(&enum_.ident) {
Some(CudnnEnumMergeResult::Conflict) | None => Some(parse_quote!( #enum_)),
Some(CudnnEnumMergeResult::Same) => {
let type_ = &enum_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
Some(CudnnEnumMergeResult::Cudnn9) => {
let type_ = &enum_.ident;
Some(parse_quote!( pub use super::cudnn9:: #type_; ))
}
},
Item::ForeignMod(ItemForeignMod { .. }) => None,
Item::Const(const_) => Some(parse_quote!(#const_)),
Item::Union(union_) => match conflicting_types.get(&union_.ident) {
Some(CudnnEnumMergeResult::Conflict) | None => Some(parse_quote!( #union_)),
Some(CudnnEnumMergeResult::Same) => {
let type_ = &union_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
Some(CudnnEnumMergeResult::Cudnn9) => {
let type_ = &union_.ident;
Some(parse_quote!( pub use super::cudnn9:: #type_; ))
}
},
Item::Use(use_) => Some(parse_quote!(#use_)),
Item::Type(type_) => Some(parse_quote!(#type_)),
_ => unimplemented!(),
});
let module: syn::File = parse_quote! {
#(#items)*
};
let mut output = output.clone();
output.extend(cudnn8_path);
let text = prettyplease::unparse(&module);
write_rust_to_file(output, &text)
}
fn write_common_cudnn_types(
output: &PathBuf,
cudnn_path: &[&str],
cudnn9_types: &syn::File,
conflicting_types: &FxHashMap<&Ident, CudnnEnumMergeResult>,
) {
let common_items = cudnn9_types.items.iter().filter_map(|item| match item {
Item::Impl(ref impl_) => match conflicting_types.get(type_to_ident(&*impl_.self_ty)) {
Some(CudnnEnumMergeResult::Conflict) => None,
Some(CudnnEnumMergeResult::Same) => {
let item: Item = parse_quote! {
#impl_
};
Some(item)
}
Some(CudnnEnumMergeResult::Cudnn9) => None,
None => None,
},
Item::Struct(ref struct_) => match conflicting_types.get(&struct_.ident) {
Some(CudnnEnumMergeResult::Conflict) => None,
Some(CudnnEnumMergeResult::Same) => {
let item: Item = parse_quote! {
#struct_
};
Some(item)
}
Some(CudnnEnumMergeResult::Cudnn9) => None,
None => None,
},
Item::Enum(ref enum_) => match conflicting_types.get(&enum_.ident) {
Some(CudnnEnumMergeResult::Conflict) => None,
Some(CudnnEnumMergeResult::Same) => {
let item: Item = parse_quote! {
#enum_
};
Some(item)
}
Some(CudnnEnumMergeResult::Cudnn9) => None,
None => None,
},
Item::ForeignMod(ItemForeignMod { .. }) => None,
_ => None,
//_ => unimplemented!(),
});
let cudnn_common: syn::File = parse_quote! {
#(#common_items)*
};
let mut output = output.clone();
output.extend(cudnn_path);
let text = prettyplease::unparse(&cudnn_common);
write_rust_to_file(output, &text)
}
fn get_conflicting_structs<'a>(
cudnn9_types: &'a syn::File,
cudnn8_types: &'a syn::File,
mut enums: FxHashMap<&'a Ident, CudnnEnumMergeResult>,
) -> FxHashMap<&'a Ident, CudnnEnumMergeResult> {
let structs9 = get_structs(cudnn9_types);
let structs8 = get_structs(cudnn8_types);
for (struct_name8, struct8) in structs8 {
if enums.contains_key(struct_name8) {
continue;
}
match structs9.get(struct_name8) {
Some(struct9) => {
if struct8 != *struct9 {
panic!("{}", struct_name8.to_string());
}
let has_conflicting_field = struct8.iter().any(|field| {
let type_ = type_to_ident(&field.ty);
enums.get(type_) == Some(&CudnnEnumMergeResult::Conflict)
});
let value = if has_conflicting_field {
CudnnEnumMergeResult::Conflict
} else {
CudnnEnumMergeResult::Same
};
assert!(enums.insert(struct_name8, value).is_none());
}
None => {}
}
}
enums
}
fn type_to_ident<'a>(ty: &'a syn::Type) -> &'a syn::Ident {
match ty {
Type::Path(path) => &path.path.segments[0].ident,
Type::Array(array) => type_to_ident(&array.elem),
_ => unimplemented!("{}", ty.to_token_stream().to_string()),
}
}
fn merge_enums<'a>(
cudnn9_types: &'a syn::File,
cudnn8_types: &'a syn::File,
) -> FxHashMap<&'a Ident, CudnnEnumMergeResult> {
let result = {
let enums8 = get_enums(cudnn8_types);
let enums9 = get_enums(cudnn9_types);
enums8
.iter()
.map(|(enum8_ident, enum8_vars)| {
let merge_result = match enums9.get(enum8_ident) {
Some(enum9_vars) => {
let e8_has_extra = enum8_vars.difference(&enum9_vars).any(|_| true);
let e9_has_extra = enum9_vars.difference(&enum8_vars).any(|_| true);
match (e8_has_extra, e9_has_extra) {
(false, false) => CudnnEnumMergeResult::Same,
(false, true) => CudnnEnumMergeResult::Cudnn9,
(true, true) => CudnnEnumMergeResult::Conflict,
(true, false) => unimplemented!(),
}
}
None => {
unimplemented!()
}
};
(*enum8_ident, merge_result)
})
.collect::<FxHashMap<_, _>>()
};
result
}
#[derive(Copy, Clone, PartialEq, Eq)]
enum CudnnEnumMergeResult {
// Conflicting definitions
Conflict,
// Identical definitions
Same,
// Enum present in both, but cudnn9 definition is a strict superset
Cudnn9,
}
fn get_enums<'a>(
cudnn_module: &'a syn::File,
) -> FxHashMap<&'a Ident, FxHashSet<&'a syn::ImplItemConst>> {
let mut enums = FxHashMap::default();
for item in cudnn_module.items.iter() {
match item {
Item::Impl(ref impl_) => match &*impl_.self_ty {
Type::Path(path) => {
let constant = match impl_.items[0] {
syn::ImplItem::Const(ref impl_item_const) => impl_item_const,
_ => unimplemented!(),
};
enums
.entry(&path.path.segments[0].ident)
.or_insert(FxHashSet::default())
.insert(constant);
}
_ => unimplemented!(),
},
_ => {}
}
}
enums
}
fn get_structs<'a>(cudnn_module: &'a syn::File) -> FxHashMap<&'a Ident, Cow<'a, syn::Fields>> {
let mut structs = FxHashMap::default();
for item in cudnn_module.items.iter() {
match item {
Item::Struct(ref struct_) => {
assert!(structs
.insert(&struct_.ident, Cow::Borrowed(&struct_.fields))
.is_none());
}
Item::Union(ref union_) => {
assert!(structs
.insert(
&union_.ident,
Cow::Owned(syn::Fields::Named(union_.fields.clone()))
)
.is_none());
}
_ => {}
}
}
structs
}
fn generate_cublas(crate_root: &PathBuf) {
let cublas_header = new_builder()
.header("/usr/local/cuda/include/cublas_v2.h")
.allowlist_type("^cublas.*")
.allowlist_function("^cublas.*")
.allowlist_var("^CUBLAS_.*")
.must_use_type("cublasStatus_t")
.allowlist_recursively(false)
.clang_args(["-I/usr/local/cuda/include", "-x", "c++"])
.generate()
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cublas_header).unwrap();
generate_functions(
&crate_root,
"cublas",
&["..", "cuda_base", "src", "cublas.rs"],
&module,
);
generate_types_library(
&crate_root,
&["..", "cuda_types", "src", "cublas.rs"],
&module,
)
}
fn generate_cublaslt(crate_root: &PathBuf) {
let cublas_header = new_builder()
.header("/usr/local/cuda/include/cublasLt.h")
.allowlist_type("^cublas.*")
.allowlist_function("^cublasLt.*")
.allowlist_var("^CUBLASLT_.*")
.must_use_type("cublasStatus_t")
.allowlist_recursively(false)
.clang_args(["-I/usr/local/cuda/include", "-x", "c++"])
.generate()
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cublas_header).unwrap();
generate_functions(
&crate_root,
"cublaslt",
&["..", "cuda_base", "src", "cublaslt.rs"],
&module,
);
generate_types_library(
&crate_root,
&["..", "cuda_types", "src", "cublaslt.rs"],
&module,
)
} }
fn generate_cuda(crate_root: &PathBuf) { fn generate_cuda(crate_root: &PathBuf) {
let cuda_header = bindgen::Builder::default() let cuda_header = new_builder()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
.layout_tests(false)
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
is_global: false,
})
.derive_hash(true)
.derive_eq(true)
.header_contents("cuda_wrapper.h", include_str!("../build/cuda_wrapper.h")) .header_contents("cuda_wrapper.h", include_str!("../build/cuda_wrapper.h"))
.allowlist_type("^CU.*") .allowlist_type("^CU.*")
.allowlist_type("^cuda.*")
.allowlist_type("^cu.*Complex.*")
.allowlist_type("^libraryPropertyType.*")
.allowlist_function("^cu.*") .allowlist_function("^cu.*")
.allowlist_var("^CU.*") .allowlist_var("^CU.*")
.must_use_type("cudaError_enum") .must_use_type("cudaError_enum")
@ -67,22 +557,14 @@ fn generate_cuda(crate_root: &PathBuf) {
} }
fn generate_ml(crate_root: &PathBuf) { fn generate_ml(crate_root: &PathBuf) {
let ml_header = bindgen::Builder::default() let ml_header = new_builder()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
.layout_tests(false)
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
is_global: false,
})
.derive_hash(true)
.derive_eq(true)
.header("/usr/local/cuda/include/nvml.h") .header("/usr/local/cuda/include/nvml.h")
.allowlist_type("^nvml.*") .allowlist_type("^nvml.*")
.allowlist_function("^nvml.*") .allowlist_function("^nvml.*")
.allowlist_var("^NVML.*") .allowlist_var("^NVML.*")
.must_use_type("nvmlReturn_t") .must_use_type("nvmlReturn_t")
.constified_enum("nvmlReturn_enum") .constified_enum("nvmlReturn_enum")
.clang_args(["-I/usr/local/cuda/include"])
.generate() .generate()
.unwrap() .unwrap()
.to_string(); .to_string();
@ -112,37 +594,51 @@ fn generate_ml(crate_root: &PathBuf) {
&["..", "cuda_base", "src", "nvml.rs"], &["..", "cuda_base", "src", "nvml.rs"],
&module, &module,
); );
generate_types( generate_types_library(
&crate_root, &crate_root,
&["..", "cuda_types", "src", "nvml.rs"], &["..", "cuda_types", "src", "nvml.rs"],
&module, &module,
); );
} }
fn generate_types(crate_root: &PathBuf, path: &[&str], module: &syn::File) { fn generate_types_library(crate_root: &PathBuf, path: &[&str], module: &syn::File) {
let module = generate_types_library_impl(module);
let mut output = crate_root.clone();
output.extend(path);
let text = prettyplease::unparse(&module)
.replace("self::cudaDataType", "super::cuda::cudaDataType")
// complex as used by cuFFT
.replace(" cuComplex", " super::cuda::cuComplex")
.replace(" cuDoubleComplex", " super::cuda::cuDoubleComplex");
write_rust_to_file(output, &text)
}
fn generate_types_library_impl(module: &syn::File) -> syn::File {
let known_reexports: Punctuated<syn::Item, syn::parse::Nothing> = parse_quote! {
pub type __half = u16;
pub type __nv_bfloat16 = u16;
pub use super::cuda::cuComplex;
pub use super::cuda::cuDoubleComplex;
pub use super::cuda::cudaDataType;
pub use super::cuda::cudaDataType_t;
pub type cudaStream_t = super::cuda::CUstream;
pub use super::cuda::libraryPropertyType;
pub type cudaGraphExecUpdateResultInfo_st = super::cuda::CUgraphExecUpdateResultInfo_st;
pub type cudaAsyncNotificationType = super::cuda::CUasyncNotificationType_enum;
pub type cudaGraph_t = super::cuda::CUgraph;
};
let non_fn = module.items.iter().filter_map(|item| match item { let non_fn = module.items.iter().filter_map(|item| match item {
Item::ForeignMod(_) => None, Item::ForeignMod(_) => None,
_ => Some(item), _ => Some(item),
}); });
let module: syn::File = parse_quote! { let items = known_reexports.iter().chain(non_fn);
#(#non_fn)* parse_quote! {
}; #(#items)*
let mut output = crate_root.clone(); }
output.extend(path);
write_rust_to_file(output, &prettyplease::unparse(&module))
} }
fn generate_hip_runtime(output: &PathBuf, path: &[&str]) { fn generate_hip_runtime(output: &PathBuf, path: &[&str]) {
let hiprt_header = bindgen::Builder::default() let hiprt_header = new_builder()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
.layout_tests(false)
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
is_global: false,
})
.derive_hash(true)
.derive_eq(true)
.header("/opt/rocm/include/hip/hip_runtime_api.h") .header("/opt/rocm/include/hip/hip_runtime_api.h")
.allowlist_type("^hip.*") .allowlist_type("^hip.*")
.allowlist_function("^hip.*") .allowlist_function("^hip.*")
@ -403,7 +899,7 @@ impl VisitMut for PrependCudaPath {
fn visit_type_path_mut(&mut self, type_: &mut TypePath) { fn visit_type_path_mut(&mut self, type_: &mut TypePath) {
if type_.path.segments.len() == 1 { if type_.path.segments.len() == 1 {
match &*type_.path.segments[0].ident.to_string() { match &*type_.path.segments[0].ident.to_string() {
"usize" | "f64" | "f32" => {} "usize" | "u32" | "i32" | "u64" | "i64" | "f64" | "f32" | "FILE" => {}
_ => { _ => {
let module = &self.module; let module = &self.module;
*type_ = parse_quote! { cuda_types :: #module :: #type_ }; *type_ = parse_quote! { cuda_types :: #module :: #type_ };
@ -426,7 +922,7 @@ struct ExplicitReturnType;
impl VisitMut for ExplicitReturnType { impl VisitMut for ExplicitReturnType {
fn visit_return_type_mut(&mut self, i: &mut syn::ReturnType) { fn visit_return_type_mut(&mut self, i: &mut syn::ReturnType) {
if let syn::ReturnType::Default = i { if let syn::ReturnType::Default = i {
*i = parse_quote! { -> {} }; *i = parse_quote! { -> () };
} }
} }
} }
@ -459,6 +955,7 @@ fn generate_display(
"CUdevResource_st", "CUdevResource_st",
"CUlaunchAttribute_st", "CUlaunchAttribute_st",
"CUlaunchConfig_st", "CUlaunchConfig_st",
"CUmemcpy3DOperand_st",
]; ];
let ignore_functions = [ let ignore_functions = [
"cuGLGetDevices", "cuGLGetDevices",
@ -798,3 +1295,16 @@ fn curesult_display_trait(derive_state: &DeriveDisplayState) -> syn::Item {
} }
} }
} }
fn new_builder() -> bindgen::Builder {
bindgen::Builder::default()
.use_core()
.rust_target(bindgen::RustTarget::Stable_1_77)
.layout_tests(false)
.default_enum_style(bindgen::EnumVariation::NewType {
is_bitfield: false,
is_global: false,
})
.derive_hash(true)
.derive_eq(true)
}

17
zluda_blas/Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "zluda_blas"
version = "0.0.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
name = "cublas"
[dependencies]
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
[package.metadata.zluda]
linux_symlinks = [
"libcublas.so.12",
]

32
zluda_blas/src/impl.rs Normal file
View File

@ -0,0 +1,32 @@
use cuda_types::cublas::cublasStatus_t;
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> cublasStatus_t {
unimplemented!()
}
#[cfg(not(debug_assertions))]
pub(crate) fn unimplemented() -> cublasStatus_t {
cublasStatus_t::CUBLAS_STATUS_NOT_SUPPORTED
}
#[allow(non_snake_case)]
pub fn cublasGetStatusName(_status: cuda_types::cublas::cublasStatus_t) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub fn cublasGetStatusString(_status: cuda_types::cublas::cublasStatus_t) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub fn cublasXerbla(_srName: *const ::core::ffi::c_char, _info: ::core::ffi::c_int) -> () {
todo!()
}
#[allow(non_snake_case)]
pub fn cublasGetCudartVersion() -> usize {
todo!()
}

37
zluda_blas/src/lib.rs Normal file
View File

@ -0,0 +1,37 @@
mod r#impl;
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::unimplemented()
}
)*
};
}
macro_rules! implemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::$fn_name( $( $arg_id ),* )
}
)*
};
}
cuda_base::cublas_function_declarations!(
unimplemented,
implemented <= [
cublasGetStatusName,
cublasGetStatusString,
cublasXerbla,
cublasGetCudartVersion
]
);

18
zluda_blaslt/Cargo.toml Normal file
View File

@ -0,0 +1,18 @@
[package]
name = "zluda_blaslt"
version = "0.0.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
name = "cublaslt"
[dependencies]
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
[package.metadata.zluda]
linux_symlinks = [
"libcublasLt.so",
"libcublasLt.so.12",
]

42
zluda_blaslt/src/impl.rs Normal file
View File

@ -0,0 +1,42 @@
use cuda_types::cublaslt::cublasStatus_t;
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> cublasStatus_t {
unimplemented!()
}
#[cfg(not(debug_assertions))]
pub(crate) fn unimplemented() -> cublasStatus_t {
cublasStatus_t::CUBLAS_STATUS_NOT_SUPPORTED
}
#[allow(non_snake_case)]
pub(crate) fn cublasLtGetStatusName(
_status: cuda_types::cublaslt::cublasStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cublasLtGetStatusString(
_status: cuda_types::cublaslt::cublasStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cublasLtGetVersion() -> usize {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cublasLtGetCudartVersion() -> usize {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cublasLtDisableCpuInstructionsSetMask(
_mask: ::core::ffi::c_uint,
) -> ::core::ffi::c_uint {
todo!()
}

40
zluda_blaslt/src/lib.rs Normal file
View File

@ -0,0 +1,40 @@
mod r#impl;
pub enum FILE { }
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::unimplemented()
}
)*
};
}
macro_rules! implemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::$fn_name( $( $arg_id ),* )
}
)*
};
}
cuda_base::cublaslt_function_declarations!(
unimplemented,
implemented <= [
cublasLtGetStatusName,
cublasLtGetStatusString,
cublasLtDisableCpuInstructionsSetMask,
cublasLtGetVersion,
cublasLtGetCudartVersion
]
);

18
zluda_dnn/Cargo.toml Normal file
View File

@ -0,0 +1,18 @@
[package]
name = "zluda_dnn"
version = "0.0.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
name = "cudnn64_9"
[dependencies]
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
[package.metadata.zluda]
linux_symlinks = [
"libcudnn.so",
"libcudnn.so.9",
]

34
zluda_dnn/src/impl.rs Normal file
View File

@ -0,0 +1,34 @@
use cuda_types::cudnn9::cudnnStatus_t;
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> cudnnStatus_t {
unimplemented!()
}
#[cfg(not(debug_assertions))]
pub(crate) fn unimplemented() -> cudnnStatus_t {
cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED
}
#[allow(non_snake_case)]
pub(crate) fn cudnnGetVersion() -> usize {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cudnnGetMaxDeviceVersion() -> usize {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cudnnGetCudartVersion() -> usize {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cudnnGetErrorString(
_status: cuda_types::cudnn9::cudnnStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cudnnGetLastErrorString(_message: *mut ::core::ffi::c_char, _max_size: usize) -> () {
todo!()
}

38
zluda_dnn/src/lib.rs Normal file
View File

@ -0,0 +1,38 @@
mod r#impl;
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::unimplemented()
}
)*
};
}
macro_rules! implemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::$fn_name( $( $arg_id ),* )
}
)*
};
}
cuda_base::cudnn9_function_declarations!(
unimplemented,
implemented <= [
cudnnGetVersion,
cudnnGetMaxDeviceVersion,
cudnnGetCudartVersion,
cudnnGetErrorString,
cudnnGetLastErrorString
]
);

View File

@ -114,6 +114,17 @@ impl CudaDisplay for f32 {
} }
} }
impl CudaDisplay for f64 {
fn write(
&self,
_fn_name: &'static str,
_index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
write!(writer, "{}", *self)
}
}
pub fn write_handle<T: LowerHex>( pub fn write_handle<T: LowerHex>(
this: &[T; 64], this: &[T; 64],
writer: &mut (impl std::io::Write + ?Sized), writer: &mut (impl std::io::Write + ?Sized),
@ -257,6 +268,69 @@ impl CudaDisplay for CUstreamBatchMemOpParams {
} }
} }
impl CudaDisplay for CUcheckpointRestoreArgs_st {
fn write(
&self,
fn_name: &'static str,
index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
CudaDisplay::write(&self.reserved, fn_name, index, writer)
}
}
impl CudaDisplay for CUcheckpointUnlockArgs_st {
fn write(
&self,
fn_name: &'static str,
index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
CudaDisplay::write(&self.reserved, fn_name, index, writer)
}
}
impl CudaDisplay for CUcheckpointCheckpointArgs_st {
fn write(
&self,
fn_name: &'static str,
index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
CudaDisplay::write(&self.reserved, fn_name, index, writer)
}
}
impl CudaDisplay for CUmemcpy3DOperand_st {
fn write(
&self,
fn_name: &'static str,
index: usize,
writer: &mut (impl std::io::Write + ?Sized),
) -> std::io::Result<()> {
writer.write_all(b"{ type_: ")?;
CudaDisplay::write(&self.type_, "", 0, writer)?;
writer.write_all(b", op: ")?;
match self.type_ {
CUmemcpy3DOperandType::CU_MEMCPY_OPERAND_TYPE_ARRAY => {
CudaDisplay::write(unsafe { &self.op.array }, fn_name, index, writer)?;
}
CUmemcpy3DOperandType::CU_MEMCPY_OPERAND_TYPE_POINTER => {
CudaDisplay::write(unsafe { &self.op.ptr }, fn_name, index, writer)?;
}
_ => {
CudaDisplay::write(
&unsafe { mem::transmute::<_, [u8; 32]>(self.op) },
fn_name,
index,
writer,
)?;
}
}
writer.write_all(b" }")
}
}
pub fn write_wait_value( pub fn write_wait_value(
this: &CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st, this: &CUstreamBatchMemOpParams_union_CUstreamMemOpWaitValueParams_st,
writer: &mut (impl std::io::Write + ?Sized), writer: &mut (impl std::io::Write + ?Sized),
@ -369,7 +443,7 @@ impl CudaDisplay for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
CudaDisplay::write(&self.size, "", 0, writer)?; CudaDisplay::write(&self.size, "", 0, writer)?;
writer.write_all(b", flags: ")?; writer.write_all(b", flags: ")?;
CudaDisplay::write(&self.flags, "", 0, writer)?; CudaDisplay::write(&self.flags, "", 0, writer)?;
return writer.write_all(b", ... }") return writer.write_all(b", ... }");
} }
} }
writer.write_all(b", size: ")?; writer.write_all(b", size: ")?;
@ -441,9 +515,7 @@ impl CudaDisplay for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
} }
} }
impl CudaDisplay impl CudaDisplay for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2
{
fn write( fn write(
&self, &self,
_fn_name: &'static str, _fn_name: &'static str,
@ -456,9 +528,7 @@ impl CudaDisplay
} }
} }
impl CudaDisplay impl CudaDisplay for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2
{
fn write( fn write(
&self, &self,
_fn_name: &'static str, _fn_name: &'static str,
@ -667,15 +737,30 @@ fn write_launch_attribute(
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).clusterSchedulingPolicyPreference }, fn_name, index, writer) CudaDisplay::write(
unsafe { &(*value_out).clusterSchedulingPolicyPreference },
fn_name,
index,
writer,
)
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).programmaticStreamSerializationAllowed }, fn_name, index, writer) CudaDisplay::write(
unsafe { &(*value_out).programmaticStreamSerializationAllowed },
fn_name,
index,
writer,
)
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).programmaticEvent }, fn_name, index, writer) CudaDisplay::write(
unsafe { &(*value_out).programmaticEvent },
fn_name,
index,
writer,
)
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_PRIORITY => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
@ -683,19 +768,39 @@ fn write_launch_attribute(
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).memSyncDomainMap }, fn_name, index, writer) CudaDisplay::write(
unsafe { &(*value_out).memSyncDomainMap },
fn_name,
index,
writer,
)
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).memSyncDomain }, fn_name, index, writer) CudaDisplay::write(
unsafe { &(*value_out).memSyncDomain },
fn_name,
index,
writer,
)
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).launchCompletionEvent }, fn_name, index, writer) CudaDisplay::write(
unsafe { &(*value_out).launchCompletionEvent },
fn_name,
index,
writer,
)
} }
CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => { CUlaunchAttributeID::CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE => {
writer.write_all(b", value_out: ")?; writer.write_all(b", value_out: ")?;
CudaDisplay::write(unsafe { &(*value_out).deviceUpdatableKernelNode }, fn_name, index, writer) CudaDisplay::write(
unsafe { &(*value_out).deviceUpdatableKernelNode },
fn_name,
index,
writer,
)
} }
_ => writer.write_all(b", ... "), _ => writer.write_all(b", ... "),
} }

File diff suppressed because it is too large Load Diff

17
zluda_fft/Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "zluda_fft"
version = "0.0.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
name = "cufft"
[dependencies]
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
[package.metadata.zluda]
linux_symlinks = [
"libcufft.so.11",
]

11
zluda_fft/src/impl.rs Normal file
View File

@ -0,0 +1,11 @@
use cuda_types::cufft::cufftResult_t;
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> cufftResult_t {
unimplemented!()
}
#[cfg(not(debug_assertions))]
pub(crate) fn unimplemented() -> cufftResult_t {
cufftResult_t::CUFFT_NOT_SUPPORTED
}

18
zluda_fft/src/lib.rs Normal file
View File

@ -0,0 +1,18 @@
mod r#impl;
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::unimplemented()
}
)*
};
}
cuda_base::cufft_function_declarations!(
unimplemented
);

17
zluda_sparse/Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "zluda_sparse"
version = "0.0.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
name = "cusparse"
[dependencies]
cuda_base = { path = "../cuda_base" }
cuda_types = { path = "../cuda_types" }
[package.metadata.zluda]
linux_symlinks = [
"libcusparse.so.12",
]

53
zluda_sparse/src/impl.rs Normal file
View File

@ -0,0 +1,53 @@
use cuda_types::cusparse::cusparseStatus_t;
#[cfg(debug_assertions)]
pub(crate) fn unimplemented() -> cusparseStatus_t {
unimplemented!()
}
#[cfg(not(debug_assertions))]
pub(crate) fn unimplemented() -> cusparseStatus_t {
cusparseStatus_t::CUSPARSE_STATUS_NOT_SUPPORTED
}
#[allow(non_snake_case)]
pub(crate) fn cusparseGetErrorName(
_status: cuda_types::cusparse::cusparseStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cusparseGetErrorString(
_status: cuda_types::cusparse::cusparseStatus_t,
) -> *const ::core::ffi::c_char {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cusparseGetMatType(
_descrA: cuda_types::cusparse::cusparseMatDescr_t,
) -> cuda_types::cusparse::cusparseMatrixType_t {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cusparseGetMatFillMode(
_descrA: cuda_types::cusparse::cusparseMatDescr_t,
) -> cuda_types::cusparse::cusparseFillMode_t {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cusparseGetMatDiagType(
_descrA: cuda_types::cusparse::cusparseMatDescr_t,
) -> cuda_types::cusparse::cusparseDiagType_t {
todo!()
}
#[allow(non_snake_case)]
pub(crate) fn cusparseGetMatIndexBase(
_descrA: cuda_types::cusparse::cusparseMatDescr_t,
) -> cuda_types::cusparse::cusparseIndexBase_t {
todo!()
}

42
zluda_sparse/src/lib.rs Normal file
View File

@ -0,0 +1,42 @@
mod r#impl;
pub enum FILE { }
macro_rules! unimplemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::unimplemented()
}
)*
};
}
macro_rules! implemented {
($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* ) -> $ret_type:ty;)*) => {
$(
#[cfg_attr(not(test), no_mangle)]
#[allow(improper_ctypes)]
#[allow(improper_ctypes_definitions)]
pub unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
crate::r#impl::$fn_name( $( $arg_id ),* )
}
)*
};
}
cuda_base::cusparse_function_declarations!(
unimplemented,
implemented <= [
cusparseGetErrorName,
cusparseGetErrorString,
cusparseGetMatIndexBase,
cusparseGetMatDiagType,
cusparseGetMatFillMode,
cusparseGetMatType
]
);