diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index efe9d1c..955bf3c 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.4.1-base-ubuntu22.04 +FROM nvidia/cuda:12.8.1-base-ubuntu24.04 RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ wget \ @@ -18,10 +18,14 @@ RUN wget https://apt.llvm.org/llvm.sh && \ ./llvm.sh ${LLVM_VERSION} # Feel free to change to a newer version if you have a newer verison on your host -ARG CUDA_PKG_VERSION=12-4 +ARG CUDA_PKG_VERSION=12-8 # Docker <-> host driver version compatiblity is newer host <-> older docker -# We don't care about a specific driver version, so pick oldest 5XX -ARG CUDA_DRIVER=515 +# We don't care about a specific driver version, so pick oldest 5XX compatible +ARG CUDA_DRIVER=570 +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \ + dpkg -i libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \ + rm libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # CUDA headers need it for interop libgl-dev libegl-dev libvdpau-dev \ @@ -30,13 +34,18 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninter cuda-nvml-dev-${CUDA_PKG_VERSION} \ cuda-cudart-${CUDA_PKG_VERSION} \ cuda-profiler-api-${CUDA_PKG_VERSION} \ - cuda-nvcc-${CUDA_PKG_VERSION} + cuda-nvcc-${CUDA_PKG_VERSION} \ + libcudnn8-dev \ + cudnn9-cuda-${CUDA_PKG_VERSION} \ + libcufft-dev-${CUDA_PKG_VERSION} \ + libcublas-dev-${CUDA_PKG_VERSION} \ + libcusparse-dev-${CUDA_PKG_VERSION} -ARG ROCM_VERSION=6.3.1 +ARG ROCM_VERSION=6.4 RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \ wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \ gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \ - echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} jammy main" > /etc/apt/sources.list.d/rocm.list && \ + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} noble main" > /etc/apt/sources.list.d/rocm.list && \ echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \ DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ rocminfo \ diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 34e88fb..7c3c934 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,7 +7,7 @@ }, "securityOpt": [ "seccomp=unconfined" ], "runArgs": [ - "--runtime=nvidia", + //"--runtime=nvidia", "--device=/dev/kfd", "--device=/dev/dri", "--group-add=video" @@ -25,7 +25,7 @@ }, // https://aka.ms/dev-containers-non-root. "remoteUser": "root", - //"hostRequirements": { "gpu": "optional" } + "hostRequirements": { "gpu": true }, "customizations": { "vscode": { "extensions": [ "mhutchie.git-graph" ] diff --git a/Cargo.lock b/Cargo.lock index 5726bb3..1d10122 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1343,6 +1343,30 @@ dependencies = [ "syn 2.0.89", ] +[[package]] +name = "zluda_blas" +version = "0.0.0" +dependencies = [ + "cuda_base", + "cuda_types", +] + +[[package]] +name = "zluda_blaslt" +version = "0.0.0" +dependencies = [ + "cuda_base", + "cuda_types", +] + +[[package]] +name = "zluda_dnn" +version = "0.0.0" +dependencies = [ + "cuda_base", + "cuda_types", +] + [[package]] name = "zluda_dump" version = "0.0.0" @@ -1364,6 +1388,14 @@ dependencies = [ "winapi", ] +[[package]] +name = "zluda_fft" +version = "0.0.0" +dependencies = [ + "cuda_base", + "cuda_types", +] + [[package]] name = "zluda_inject" version = "0.0.0" @@ -1393,3 +1425,11 @@ dependencies = [ "wchar", "winapi", ] + +[[package]] +name = "zluda_sparse" +version = "0.0.0" +dependencies = [ + "cuda_base", + "cuda_types", +] diff --git a/Cargo.toml b/Cargo.toml index 18fd140..875d36f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,11 @@ members = [ "ptx_parser_macros_impl", "xtask", "zluda_bindgen", + "zluda_dnn", + "zluda_blas", + "zluda_blaslt", + "zluda_fft", + "zluda_sparse", ] default-members = ["zluda", "zluda_ml", "zluda_inject", "zluda_redirect"] diff --git a/cuda_base/src/cublas.rs b/cuda_base/src/cublas.rs new file mode 100644 index 0000000..af6702e --- /dev/null +++ b/cuda_base/src/cublas.rs @@ -0,0 +1,6861 @@ +// Generated automatically by zluda_bindgen +// DO NOT EDIT MANUALLY +#![allow(warnings)] +extern "system" { + #[must_use] + fn cublasCreate_v2( + handle: *mut cuda_types::cublas::cublasHandle_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDestroy_v2( + handle: cuda_types::cublas::cublasHandle_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetVersion_v2( + handle: cuda_types::cublas::cublasHandle_t, + version: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetProperty( + type_: cuda_types::cublas::libraryPropertyType, + value: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + fn cublasGetCudartVersion() -> usize; + #[must_use] + fn cublasSetWorkspace_v2( + handle: cuda_types::cublas::cublasHandle_t, + workspace: *mut ::core::ffi::c_void, + workspaceSizeInBytes: usize, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetStream_v2( + handle: cuda_types::cublas::cublasHandle_t, + streamId: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetStream_v2( + handle: cuda_types::cublas::cublasHandle_t, + streamId: *mut cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetPointerMode_v2( + handle: cuda_types::cublas::cublasHandle_t, + mode: *mut cuda_types::cublas::cublasPointerMode_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetPointerMode_v2( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasPointerMode_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetAtomicsMode( + handle: cuda_types::cublas::cublasHandle_t, + mode: *mut cuda_types::cublas::cublasAtomicsMode_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetAtomicsMode( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasAtomicsMode_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetMathMode( + handle: cuda_types::cublas::cublasHandle_t, + mode: *mut cuda_types::cublas::cublasMath_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetMathMode( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasMath_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetSmCountTarget( + handle: cuda_types::cublas::cublasHandle_t, + smCountTarget: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetSmCountTarget( + handle: cuda_types::cublas::cublasHandle_t, + smCountTarget: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + fn cublasGetStatusName( + status: cuda_types::cublas::cublasStatus_t, + ) -> *const ::core::ffi::c_char; + fn cublasGetStatusString( + status: cuda_types::cublas::cublasStatus_t, + ) -> *const ::core::ffi::c_char; + #[must_use] + fn cublasLoggerConfigure( + logIsOn: ::core::ffi::c_int, + logToStdOut: ::core::ffi::c_int, + logToStdErr: ::core::ffi::c_int, + logFileName: *const ::core::ffi::c_char, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetLoggerCallback( + userCallback: cuda_types::cublas::cublasLogCallback, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetLoggerCallback( + userCallback: *mut cuda_types::cublas::cublasLogCallback, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetVector( + n: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + incx: ::core::ffi::c_int, + devicePtr: *mut ::core::ffi::c_void, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetVector_64( + n: i64, + elemSize: i64, + x: *const ::core::ffi::c_void, + incx: i64, + devicePtr: *mut ::core::ffi::c_void, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetVector( + n: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + incx: ::core::ffi::c_int, + y: *mut ::core::ffi::c_void, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetVector_64( + n: i64, + elemSize: i64, + x: *const ::core::ffi::c_void, + incx: i64, + y: *mut ::core::ffi::c_void, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetMatrix( + rows: ::core::ffi::c_int, + cols: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + A: *const ::core::ffi::c_void, + lda: ::core::ffi::c_int, + B: *mut ::core::ffi::c_void, + ldb: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetMatrix_64( + rows: i64, + cols: i64, + elemSize: i64, + A: *const ::core::ffi::c_void, + lda: i64, + B: *mut ::core::ffi::c_void, + ldb: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetMatrix( + rows: ::core::ffi::c_int, + cols: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + A: *const ::core::ffi::c_void, + lda: ::core::ffi::c_int, + B: *mut ::core::ffi::c_void, + ldb: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetMatrix_64( + rows: i64, + cols: i64, + elemSize: i64, + A: *const ::core::ffi::c_void, + lda: i64, + B: *mut ::core::ffi::c_void, + ldb: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetVectorAsync( + n: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + hostPtr: *const ::core::ffi::c_void, + incx: ::core::ffi::c_int, + devicePtr: *mut ::core::ffi::c_void, + incy: ::core::ffi::c_int, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetVectorAsync_64( + n: i64, + elemSize: i64, + hostPtr: *const ::core::ffi::c_void, + incx: i64, + devicePtr: *mut ::core::ffi::c_void, + incy: i64, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetVectorAsync( + n: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + devicePtr: *const ::core::ffi::c_void, + incx: ::core::ffi::c_int, + hostPtr: *mut ::core::ffi::c_void, + incy: ::core::ffi::c_int, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetVectorAsync_64( + n: i64, + elemSize: i64, + devicePtr: *const ::core::ffi::c_void, + incx: i64, + hostPtr: *mut ::core::ffi::c_void, + incy: i64, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetMatrixAsync( + rows: ::core::ffi::c_int, + cols: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + A: *const ::core::ffi::c_void, + lda: ::core::ffi::c_int, + B: *mut ::core::ffi::c_void, + ldb: ::core::ffi::c_int, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSetMatrixAsync_64( + rows: i64, + cols: i64, + elemSize: i64, + A: *const ::core::ffi::c_void, + lda: i64, + B: *mut ::core::ffi::c_void, + ldb: i64, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetMatrixAsync( + rows: ::core::ffi::c_int, + cols: ::core::ffi::c_int, + elemSize: ::core::ffi::c_int, + A: *const ::core::ffi::c_void, + lda: ::core::ffi::c_int, + B: *mut ::core::ffi::c_void, + ldb: ::core::ffi::c_int, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGetMatrixAsync_64( + rows: i64, + cols: i64, + elemSize: i64, + A: *const ::core::ffi::c_void, + lda: i64, + B: *mut ::core::ffi::c_void, + ldb: i64, + stream: cuda_types::cublas::cudaStream_t, + ) -> cuda_types::cublas::cublasStatus_t; + fn cublasXerbla(srName: *const ::core::ffi::c_char, info: ::core::ffi::c_int) -> (); + #[must_use] + fn cublasNrm2Ex( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasNrm2Ex_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSnrm2_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSnrm2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f32, + incx: i64, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDnrm2_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDnrm2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f64, + incx: i64, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScnrm2_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScnrm2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDznrm2_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDznrm2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDotEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + y: *const ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: ::core::ffi::c_int, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDotEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + y: *const ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: i64, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDotcEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + y: *const ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: ::core::ffi::c_int, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDotcEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + y: *const ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: i64, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSdot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + y: *const f32, + incy: ::core::ffi::c_int, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSdot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f32, + incx: i64, + y: *const f32, + incy: i64, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDdot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + y: *const f64, + incy: ::core::ffi::c_int, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDdot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f64, + incx: i64, + y: *const f64, + incy: i64, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCdotu_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + result: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCdotu_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *const cuda_types::cublas::cuComplex, + incy: i64, + result: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCdotc_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + result: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCdotc_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *const cuda_types::cublas::cuComplex, + incy: i64, + result: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdotu_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + result: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdotu_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: i64, + result: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdotc_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + result: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdotc_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: i64, + result: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScalEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const ::core::ffi::c_void, + alphaType: cuda_types::cublas::cudaDataType, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScalEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const ::core::ffi::c_void, + alphaType: cuda_types::cublas::cudaDataType, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + executionType: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSscal_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *mut f32, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSscal_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const f32, + x: *mut f32, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDscal_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *mut f64, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDscal_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const f64, + x: *mut f64, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCscal_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCscal_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsscal_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsscal_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const f32, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZscal_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZscal_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdscal_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdscal_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const f64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasAxpyEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const ::core::ffi::c_void, + alphaType: cuda_types::cublas::cudaDataType, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: ::core::ffi::c_int, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasAxpyEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const ::core::ffi::c_void, + alphaType: cuda_types::cublas::cudaDataType, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: i64, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSaxpy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const f32, + incx: ::core::ffi::c_int, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSaxpy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const f32, + x: *const f32, + incx: i64, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDaxpy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const f64, + incx: ::core::ffi::c_int, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDaxpy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const f64, + x: *const f64, + incx: i64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCaxpy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCaxpy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZaxpy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZaxpy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCopyEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCopyEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScopy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScopy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f32, + incx: i64, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDcopy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDcopy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f64, + incx: i64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCcopy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCcopy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZcopy_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZcopy_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSswap_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut f32, + incx: ::core::ffi::c_int, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSswap_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut f32, + incx: i64, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDswap_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut f64, + incx: ::core::ffi::c_int, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDswap_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut f64, + incx: i64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCswap_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCswap_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZswap_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZswap_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSwapEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSwapEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIsamax_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIsamax_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f32, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIdamax_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIdamax_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f64, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIcamax_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIcamax_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIzamax_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIzamax_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIamaxEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIamaxEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIsamin_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIsamin_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f32, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIdamin_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIdamin_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f64, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIcamin_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIcamin_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIzamin_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIzamin_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIaminEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasIaminEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + result: *mut i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasAsumEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasAsumEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + result: *mut ::core::ffi::c_void, + resultType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSasum_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSasum_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f32, + incx: i64, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDasum_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDasum_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const f64, + incx: i64, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScasum_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasScasum_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + result: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDzasum_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDzasum_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + result: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSrot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut f32, + incx: ::core::ffi::c_int, + y: *mut f32, + incy: ::core::ffi::c_int, + c: *const f32, + s: *const f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSrot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut f32, + incx: i64, + y: *mut f32, + incy: i64, + c: *const f32, + s: *const f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDrot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut f64, + incx: ::core::ffi::c_int, + y: *mut f64, + incy: ::core::ffi::c_int, + c: *const f64, + s: *const f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDrot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut f64, + incx: i64, + y: *mut f64, + incy: i64, + c: *const f64, + s: *const f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCrot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + c: *const f32, + s: *const cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCrot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + c: *const f32, + s: *const cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsrot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + c: *const f32, + s: *const f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsrot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + c: *const f32, + s: *const f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZrot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + c: *const f64, + s: *const cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZrot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + c: *const f64, + s: *const cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdrot_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + c: *const f64, + s: *const f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdrot_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + c: *const f64, + s: *const f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasRotEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: ::core::ffi::c_int, + c: *const ::core::ffi::c_void, + s: *const ::core::ffi::c_void, + csType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasRotEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: i64, + c: *const ::core::ffi::c_void, + s: *const ::core::ffi::c_void, + csType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSrotg_v2( + handle: cuda_types::cublas::cublasHandle_t, + a: *mut f32, + b: *mut f32, + c: *mut f32, + s: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDrotg_v2( + handle: cuda_types::cublas::cublasHandle_t, + a: *mut f64, + b: *mut f64, + c: *mut f64, + s: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCrotg_v2( + handle: cuda_types::cublas::cublasHandle_t, + a: *mut cuda_types::cublas::cuComplex, + b: *mut cuda_types::cublas::cuComplex, + c: *mut f32, + s: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZrotg_v2( + handle: cuda_types::cublas::cublasHandle_t, + a: *mut cuda_types::cublas::cuDoubleComplex, + b: *mut cuda_types::cublas::cuDoubleComplex, + c: *mut f64, + s: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasRotgEx( + handle: cuda_types::cublas::cublasHandle_t, + a: *mut ::core::ffi::c_void, + b: *mut ::core::ffi::c_void, + abType: cuda_types::cublas::cudaDataType, + c: *mut ::core::ffi::c_void, + s: *mut ::core::ffi::c_void, + csType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSrotm_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut f32, + incx: ::core::ffi::c_int, + y: *mut f32, + incy: ::core::ffi::c_int, + param: *const f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSrotm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut f32, + incx: i64, + y: *mut f32, + incy: i64, + param: *const f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDrotm_v2( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut f64, + incx: ::core::ffi::c_int, + y: *mut f64, + incy: ::core::ffi::c_int, + param: *const f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDrotm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut f64, + incx: i64, + y: *mut f64, + incy: i64, + param: *const f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasRotmEx( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: ::core::ffi::c_int, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: ::core::ffi::c_int, + param: *const ::core::ffi::c_void, + paramType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasRotmEx_64( + handle: cuda_types::cublas::cublasHandle_t, + n: i64, + x: *mut ::core::ffi::c_void, + xType: cuda_types::cublas::cudaDataType, + incx: i64, + y: *mut ::core::ffi::c_void, + yType: cuda_types::cublas::cudaDataType, + incy: i64, + param: *const ::core::ffi::c_void, + paramType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSrotmg_v2( + handle: cuda_types::cublas::cublasHandle_t, + d1: *mut f32, + d2: *mut f32, + x1: *mut f32, + y1: *const f32, + param: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDrotmg_v2( + handle: cuda_types::cublas::cublasHandle_t, + d1: *mut f64, + d2: *mut f64, + x1: *mut f64, + y1: *const f64, + param: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasRotmgEx( + handle: cuda_types::cublas::cublasHandle_t, + d1: *mut ::core::ffi::c_void, + d1Type: cuda_types::cublas::cudaDataType, + d2: *mut ::core::ffi::c_void, + d2Type: cuda_types::cublas::cudaDataType, + x1: *mut ::core::ffi::c_void, + x1Type: cuda_types::cublas::cudaDataType, + y1: *const ::core::ffi::c_void, + y1Type: cuda_types::cublas::cudaDataType, + param: *mut ::core::ffi::c_void, + paramType: cuda_types::cublas::cudaDataType, + executiontype: cuda_types::cublas::cudaDataType, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + x: *const f32, + incx: i64, + beta: *const f32, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + beta: *const f64, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + x: *const f64, + incx: i64, + beta: *const f64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + kl: ::core::ffi::c_int, + ku: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + kl: i64, + ku: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + x: *const f32, + incx: i64, + beta: *const f32, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + kl: ::core::ffi::c_int, + ku: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + beta: *const f64, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + kl: i64, + ku: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + x: *const f64, + incx: i64, + beta: *const f64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + kl: ::core::ffi::c_int, + ku: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + kl: i64, + ku: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + kl: ::core::ffi::c_int, + ku: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + kl: i64, + ku: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const f32, + lda: ::core::ffi::c_int, + x: *mut f32, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const f32, + lda: i64, + x: *mut f32, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const f64, + lda: ::core::ffi::c_int, + x: *mut f64, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const f64, + lda: i64, + x: *mut f64, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const f32, + lda: ::core::ffi::c_int, + x: *mut f32, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const f32, + lda: i64, + x: *mut f32, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const f64, + lda: ::core::ffi::c_int, + x: *mut f64, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const f64, + lda: i64, + x: *mut f64, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStpmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const f32, + x: *mut f32, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStpmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const f32, + x: *mut f32, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtpmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const f64, + x: *mut f64, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtpmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const f64, + x: *mut f64, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtpmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const cuda_types::cublas::cuComplex, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtpmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const cuda_types::cublas::cuComplex, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtpmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const cuda_types::cublas::cuDoubleComplex, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtpmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const cuda_types::cublas::cuDoubleComplex, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const f32, + lda: ::core::ffi::c_int, + x: *mut f32, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const f32, + lda: i64, + x: *mut f32, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const f64, + lda: ::core::ffi::c_int, + x: *mut f64, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const f64, + lda: i64, + x: *mut f64, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStpsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const f32, + x: *mut f32, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStpsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const f32, + x: *mut f32, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtpsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const f64, + x: *mut f64, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtpsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const f64, + x: *mut f64, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtpsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const cuda_types::cublas::cuComplex, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtpsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const cuda_types::cublas::cuComplex, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtpsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + AP: *const cuda_types::cublas::cuDoubleComplex, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtpsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + AP: *const cuda_types::cublas::cuDoubleComplex, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStbsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const f32, + lda: ::core::ffi::c_int, + x: *mut f32, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStbsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const f32, + lda: i64, + x: *mut f32, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtbsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const f64, + lda: ::core::ffi::c_int, + x: *mut f64, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtbsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const f64, + lda: i64, + x: *mut f64, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtbsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtbsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *mut cuda_types::cublas::cuComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtbsv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtbsv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + n: i64, + k: i64, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *mut cuda_types::cublas::cuDoubleComplex, + incx: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsymv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsymv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + x: *const f32, + incx: i64, + beta: *const f32, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsymv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + beta: *const f64, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsymv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + x: *const f64, + incx: i64, + beta: *const f64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsymv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsymv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsymv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsymv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChemv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChemv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhemv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhemv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + k: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + x: *const f32, + incx: i64, + beta: *const f32, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + beta: *const f64, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + k: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + x: *const f64, + incx: i64, + beta: *const f64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhbmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhbmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSspmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + AP: *const f32, + x: *const f32, + incx: ::core::ffi::c_int, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSspmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + AP: *const f32, + x: *const f32, + incx: i64, + beta: *const f32, + y: *mut f32, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDspmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + AP: *const f64, + x: *const f64, + incx: ::core::ffi::c_int, + beta: *const f64, + y: *mut f64, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDspmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + AP: *const f64, + x: *const f64, + incx: i64, + beta: *const f64, + y: *mut f64, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChpmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + AP: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChpmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + AP: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhpmv_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + AP: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhpmv_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + AP: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSger_v2( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const f32, + incx: ::core::ffi::c_int, + y: *const f32, + incy: ::core::ffi::c_int, + A: *mut f32, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSger_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + m: i64, + n: i64, + alpha: *const f32, + x: *const f32, + incx: i64, + y: *const f32, + incy: i64, + A: *mut f32, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDger_v2( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const f64, + incx: ::core::ffi::c_int, + y: *const f64, + incy: ::core::ffi::c_int, + A: *mut f64, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDger_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + m: i64, + n: i64, + alpha: *const f64, + x: *const f64, + incx: i64, + y: *const f64, + incy: i64, + A: *mut f64, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgeru_v2( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgeru_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *const cuda_types::cublas::cuComplex, + incy: i64, + A: *mut cuda_types::cublas::cuComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgerc_v2( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgerc_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *const cuda_types::cublas::cuComplex, + incy: i64, + A: *mut cuda_types::cublas::cuComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgeru_v2( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgeru_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: i64, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgerc_v2( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgerc_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: i64, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const f32, + incx: ::core::ffi::c_int, + A: *mut f32, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + x: *const f32, + incx: i64, + A: *mut f32, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const f64, + incx: ::core::ffi::c_int, + A: *mut f64, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + x: *const f64, + incx: i64, + A: *mut f64, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + A: *mut cuda_types::cublas::cuComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCher_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCher_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + A: *mut cuda_types::cublas::cuComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZher_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZher_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSspr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const f32, + incx: ::core::ffi::c_int, + AP: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSspr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + x: *const f32, + incx: i64, + AP: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDspr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const f64, + incx: ::core::ffi::c_int, + AP: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDspr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + x: *const f64, + incx: i64, + AP: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChpr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + AP: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChpr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + AP: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhpr_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + AP: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhpr_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + AP: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const f32, + incx: ::core::ffi::c_int, + y: *const f32, + incy: ::core::ffi::c_int, + A: *mut f32, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + x: *const f32, + incx: i64, + y: *const f32, + incy: i64, + A: *mut f32, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const f64, + incx: ::core::ffi::c_int, + y: *const f64, + incy: ::core::ffi::c_int, + A: *mut f64, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + x: *const f64, + incx: i64, + y: *const f64, + incy: i64, + A: *mut f64, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *const cuda_types::cublas::cuComplex, + incy: i64, + A: *mut cuda_types::cublas::cuComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: i64, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCher2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCher2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *const cuda_types::cublas::cuComplex, + incy: i64, + A: *mut cuda_types::cublas::cuComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZher2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZher2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: i64, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSspr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f32, + x: *const f32, + incx: ::core::ffi::c_int, + y: *const f32, + incy: ::core::ffi::c_int, + AP: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSspr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f32, + x: *const f32, + incx: i64, + y: *const f32, + incy: i64, + AP: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDspr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const f64, + x: *const f64, + incx: ::core::ffi::c_int, + y: *const f64, + incy: ::core::ffi::c_int, + AP: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDspr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const f64, + x: *const f64, + incx: i64, + y: *const f64, + incy: i64, + AP: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChpr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + AP: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChpr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + y: *const cuda_types::cublas::cuComplex, + incy: i64, + AP: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhpr2_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + AP: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhpr2_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + y: *const cuda_types::cublas::cuDoubleComplex, + incy: i64, + AP: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + Aarray: *const *const f32, + lda: ::core::ffi::c_int, + xarray: *const *const f32, + incx: ::core::ffi::c_int, + beta: *const f32, + yarray: *const *mut f32, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + Aarray: *const *const f32, + lda: i64, + xarray: *const *const f32, + incx: i64, + beta: *const f32, + yarray: *const *mut f32, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + Aarray: *const *const f64, + lda: ::core::ffi::c_int, + xarray: *const *const f64, + incx: ::core::ffi::c_int, + beta: *const f64, + yarray: *const *mut f64, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f64, + Aarray: *const *const f64, + lda: i64, + xarray: *const *const f64, + incx: i64, + beta: *const f64, + yarray: *const *mut f64, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + Aarray: *const *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + xarray: *const *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + yarray: *const *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + Aarray: *const *const cuda_types::cublas::cuComplex, + lda: i64, + xarray: *const *const cuda_types::cublas::cuComplex, + incx: i64, + beta: *const cuda_types::cublas::cuComplex, + yarray: *const *mut cuda_types::cublas::cuComplex, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + Aarray: *const *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + xarray: *const *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + yarray: *const *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + Aarray: *const *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + xarray: *const *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + yarray: *const *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSHgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__half, + lda: ::core::ffi::c_int, + xarray: *const *const cuda_types::cublas::__half, + incx: ::core::ffi::c_int, + beta: *const f32, + yarray: *const *mut cuda_types::cublas::__half, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSHgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__half, + lda: i64, + xarray: *const *const cuda_types::cublas::__half, + incx: i64, + beta: *const f32, + yarray: *const *mut cuda_types::cublas::__half, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSSgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__half, + lda: ::core::ffi::c_int, + xarray: *const *const cuda_types::cublas::__half, + incx: ::core::ffi::c_int, + beta: *const f32, + yarray: *const *mut f32, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSSgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__half, + lda: i64, + xarray: *const *const cuda_types::cublas::__half, + incx: i64, + beta: *const f32, + yarray: *const *mut f32, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSTgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__nv_bfloat16, + lda: ::core::ffi::c_int, + xarray: *const *const cuda_types::cublas::__nv_bfloat16, + incx: ::core::ffi::c_int, + beta: *const f32, + yarray: *const *mut cuda_types::cublas::__nv_bfloat16, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSTgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__nv_bfloat16, + lda: i64, + xarray: *const *const cuda_types::cublas::__nv_bfloat16, + incx: i64, + beta: *const f32, + yarray: *const *mut cuda_types::cublas::__nv_bfloat16, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSSgemvBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__nv_bfloat16, + lda: ::core::ffi::c_int, + xarray: *const *const cuda_types::cublas::__nv_bfloat16, + incx: ::core::ffi::c_int, + beta: *const f32, + yarray: *const *mut f32, + incy: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSSgemvBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + Aarray: *const *const cuda_types::cublas::__nv_bfloat16, + lda: i64, + xarray: *const *const cuda_types::cublas::__nv_bfloat16, + incx: i64, + beta: *const f32, + yarray: *const *mut f32, + incy: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const f32, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const f32, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut f32, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const f64, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const f64, + y: *mut f64, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const f64, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const f64, + y: *mut f64, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuComplex, + y: *mut cuda_types::cublas::cuComplex, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuDoubleComplex, + y: *mut cuda_types::cublas::cuDoubleComplex, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSHgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const cuda_types::cublas::__half, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__half, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut cuda_types::cublas::__half, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSHgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const cuda_types::cublas::__half, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__half, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut cuda_types::cublas::__half, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSSgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const cuda_types::cublas::__half, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__half, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHSSgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const cuda_types::cublas::__half, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__half, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut f32, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSTgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const cuda_types::cublas::__nv_bfloat16, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__nv_bfloat16, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut cuda_types::cublas::__nv_bfloat16, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSTgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const cuda_types::cublas::__nv_bfloat16, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__nv_bfloat16, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut cuda_types::cublas::__nv_bfloat16, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSSgemvStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const cuda_types::cublas::__nv_bfloat16, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__nv_bfloat16, + incx: ::core::ffi::c_int, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut f32, + incy: ::core::ffi::c_int, + stridey: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasTSSgemvStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const cuda_types::cublas::__nv_bfloat16, + lda: i64, + strideA: ::core::ffi::c_longlong, + x: *const cuda_types::cublas::__nv_bfloat16, + incx: i64, + stridex: ::core::ffi::c_longlong, + beta: *const f32, + y: *mut f32, + incy: i64, + stridey: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemm_v2( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + B: *const f32, + ldb: ::core::ffi::c_int, + beta: *const f32, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + B: *const f32, + ldb: i64, + beta: *const f32, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemm_v2( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + B: *const f64, + ldb: ::core::ffi::c_int, + beta: *const f64, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + B: *const f64, + ldb: i64, + beta: *const f64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm_v2( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3m( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3m_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3mEx( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3mEx_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemm_v2( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemm3m( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemm3m_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHgemm( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::__half, + A: *const cuda_types::cublas::__half, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::__half, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::__half, + C: *mut cuda_types::cublas::__half, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHgemm_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::__half, + A: *const cuda_types::cublas::__half, + lda: i64, + B: *const cuda_types::cublas::__half, + ldb: i64, + beta: *const cuda_types::cublas::__half, + C: *mut cuda_types::cublas::__half, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmEx( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: ::core::ffi::c_int, + beta: *const f32, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmEx_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const f32, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: i64, + beta: *const f32, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmEx( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const ::core::ffi::c_void, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: ::core::ffi::c_int, + beta: *const ::core::ffi::c_void, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + computeType: cuda_types::cublas::cublasComputeType_t, + algo: cuda_types::cublas::cublasGemmAlgo_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmEx_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const ::core::ffi::c_void, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: i64, + beta: *const ::core::ffi::c_void, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + computeType: cuda_types::cublas::cublasComputeType_t, + algo: cuda_types::cublas::cublasGemmAlgo_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemmEx( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemmEx_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyrk_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + beta: *const f32, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyrk_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + beta: *const f32, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyrk_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + beta: *const f64, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyrk_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + beta: *const f64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrk_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrk_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyrk_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyrk_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrkEx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrkEx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrk3mEx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrk3mEx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherk_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + beta: *const f32, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherk_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f32, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + beta: *const f32, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZherk_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + beta: *const f64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZherk_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f64, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + beta: *const f64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherkEx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + beta: *const f32, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherkEx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f32, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + beta: *const f32, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherk3mEx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + beta: *const f32, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherk3mEx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f32, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + beta: *const f32, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyr2k_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + B: *const f32, + ldb: ::core::ffi::c_int, + beta: *const f32, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyr2k_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + B: *const f32, + ldb: i64, + beta: *const f32, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyr2k_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + B: *const f64, + ldb: ::core::ffi::c_int, + beta: *const f64, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyr2k_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + B: *const f64, + ldb: i64, + beta: *const f64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyr2k_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyr2k_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyr2k_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyr2k_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCher2k_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const f32, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCher2k_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const f32, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZher2k_v2( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const f64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZher2k_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const f64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyrkx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + B: *const f32, + ldb: ::core::ffi::c_int, + beta: *const f32, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsyrkx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + B: *const f32, + ldb: i64, + beta: *const f32, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyrkx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + B: *const f64, + ldb: ::core::ffi::c_int, + beta: *const f64, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsyrkx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + B: *const f64, + ldb: i64, + beta: *const f64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrkx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsyrkx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyrkx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsyrkx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherkx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const f32, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCherkx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const f32, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZherkx( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const f64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZherkx_64( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const f64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsymm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + B: *const f32, + ldb: ::core::ffi::c_int, + beta: *const f32, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSsymm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + B: *const f32, + ldb: i64, + beta: *const f32, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsymm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + B: *const f64, + ldb: ::core::ffi::c_int, + beta: *const f64, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDsymm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: i64, + n: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + B: *const f64, + ldb: i64, + beta: *const f64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsymm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCsymm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsymm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZsymm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChemm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasChemm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhemm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZhemm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrsm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + B: *mut f32, + ldb: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrsm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + B: *mut f32, + ldb: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrsm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + B: *mut f64, + ldb: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrsm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + B: *mut f64, + ldb: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrsm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *mut cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrsm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *mut cuda_types::cublas::cuComplex, + ldb: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrsm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *mut cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrsm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *mut cuda_types::cublas::cuDoubleComplex, + ldb: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrmm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + B: *const f32, + ldb: ::core::ffi::c_int, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrmm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + B: *const f32, + ldb: i64, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrmm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + B: *const f64, + ldb: ::core::ffi::c_int, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrmm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + B: *const f64, + ldb: i64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrmm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrmm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrmm_v2( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrmm_v2_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHgemmBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::__half, + Aarray: *const *const cuda_types::cublas::__half, + lda: ::core::ffi::c_int, + Barray: *const *const cuda_types::cublas::__half, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::__half, + Carray: *const *mut cuda_types::cublas::__half, + ldc: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHgemmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::__half, + Aarray: *const *const cuda_types::cublas::__half, + lda: i64, + Barray: *const *const cuda_types::cublas::__half, + ldb: i64, + beta: *const cuda_types::cublas::__half, + Carray: *const *mut cuda_types::cublas::__half, + ldc: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + Aarray: *const *const f32, + lda: ::core::ffi::c_int, + Barray: *const *const f32, + ldb: ::core::ffi::c_int, + beta: *const f32, + Carray: *const *mut f32, + ldc: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const f32, + Aarray: *const *const f32, + lda: i64, + Barray: *const *const f32, + ldb: i64, + beta: *const f32, + Carray: *const *mut f32, + ldc: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemmBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + Aarray: *const *const f64, + lda: ::core::ffi::c_int, + Barray: *const *const f64, + ldb: ::core::ffi::c_int, + beta: *const f64, + Carray: *const *mut f64, + ldc: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const f64, + Aarray: *const *const f64, + lda: i64, + Barray: *const *const f64, + ldb: i64, + beta: *const f64, + Carray: *const *mut f64, + ldc: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemmBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + Aarray: *const *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + Barray: *const *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + Carray: *const *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + Aarray: *const *const cuda_types::cublas::cuComplex, + lda: i64, + Barray: *const *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + Carray: *const *mut cuda_types::cublas::cuComplex, + ldc: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3mBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + Aarray: *const *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + Barray: *const *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + Carray: *const *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3mBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + Aarray: *const *const cuda_types::cublas::cuComplex, + lda: i64, + Barray: *const *const cuda_types::cublas::cuComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuComplex, + Carray: *const *mut cuda_types::cublas::cuComplex, + ldc: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemmBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + Aarray: *const *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + Barray: *const *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + Carray: *const *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + Aarray: *const *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + Barray: *const *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + Carray: *const *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHgemmStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::__half, + A: *const cuda_types::cublas::__half, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::__half, + ldb: ::core::ffi::c_int, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::__half, + C: *mut cuda_types::cublas::__half, + ldc: ::core::ffi::c_int, + strideC: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasHgemmStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::__half, + A: *const cuda_types::cublas::__half, + lda: i64, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::__half, + ldb: i64, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::__half, + C: *mut cuda_types::cublas::__half, + ldc: i64, + strideC: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + B: *const f32, + ldb: ::core::ffi::c_int, + strideB: ::core::ffi::c_longlong, + beta: *const f32, + C: *mut f32, + ldc: ::core::ffi::c_int, + strideC: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + strideA: ::core::ffi::c_longlong, + B: *const f32, + ldb: i64, + strideB: ::core::ffi::c_longlong, + beta: *const f32, + C: *mut f32, + ldc: i64, + strideC: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemmStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + B: *const f64, + ldb: ::core::ffi::c_int, + strideB: ::core::ffi::c_longlong, + beta: *const f64, + C: *mut f64, + ldc: ::core::ffi::c_int, + strideC: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemmStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + strideA: ::core::ffi::c_longlong, + B: *const f64, + ldb: i64, + strideB: ::core::ffi::c_longlong, + beta: *const f64, + C: *mut f64, + ldc: i64, + strideC: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemmStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + strideC: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemmStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + strideC: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3mStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + strideC: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgemm3mStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuComplex, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + strideC: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemmStridedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + strideC: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgemmStridedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + strideA: ::core::ffi::c_longlong, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + strideB: ::core::ffi::c_longlong, + beta: *const cuda_types::cublas::cuDoubleComplex, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + strideC: ::core::ffi::c_longlong, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmBatchedEx( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const ::core::ffi::c_void, + Aarray: *const *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + Barray: *const *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: ::core::ffi::c_int, + beta: *const ::core::ffi::c_void, + Carray: *const *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + computeType: cuda_types::cublas::cublasComputeType_t, + algo: cuda_types::cublas::cublasGemmAlgo_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmBatchedEx_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const ::core::ffi::c_void, + Aarray: *const *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + Barray: *const *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: i64, + beta: *const ::core::ffi::c_void, + Carray: *const *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + batchCount: i64, + computeType: cuda_types::cublas::cublasComputeType_t, + algo: cuda_types::cublas::cublasGemmAlgo_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmStridedBatchedEx( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + alpha: *const ::core::ffi::c_void, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: ::core::ffi::c_int, + strideA: ::core::ffi::c_longlong, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: ::core::ffi::c_int, + strideB: ::core::ffi::c_longlong, + beta: *const ::core::ffi::c_void, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: ::core::ffi::c_int, + strideC: ::core::ffi::c_longlong, + batchCount: ::core::ffi::c_int, + computeType: cuda_types::cublas::cublasComputeType_t, + algo: cuda_types::cublas::cublasGemmAlgo_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmStridedBatchedEx_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + k: i64, + alpha: *const ::core::ffi::c_void, + A: *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType, + lda: i64, + strideA: ::core::ffi::c_longlong, + B: *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType, + ldb: i64, + strideB: ::core::ffi::c_longlong, + beta: *const ::core::ffi::c_void, + C: *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType, + ldc: i64, + strideC: ::core::ffi::c_longlong, + batchCount: i64, + computeType: cuda_types::cublas::cublasComputeType_t, + algo: cuda_types::cublas::cublasGemmAlgo_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmGroupedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa_array: *const cuda_types::cublas::cublasOperation_t, + transb_array: *const cuda_types::cublas::cublasOperation_t, + m_array: *const ::core::ffi::c_int, + n_array: *const ::core::ffi::c_int, + k_array: *const ::core::ffi::c_int, + alpha_array: *const f32, + Aarray: *const *const f32, + lda_array: *const ::core::ffi::c_int, + Barray: *const *const f32, + ldb_array: *const ::core::ffi::c_int, + beta_array: *const f32, + Carray: *const *mut f32, + ldc_array: *const ::core::ffi::c_int, + group_count: ::core::ffi::c_int, + group_size: *const ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgemmGroupedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa_array: *const cuda_types::cublas::cublasOperation_t, + transb_array: *const cuda_types::cublas::cublasOperation_t, + m_array: *const i64, + n_array: *const i64, + k_array: *const i64, + alpha_array: *const f32, + Aarray: *const *const f32, + lda_array: *const i64, + Barray: *const *const f32, + ldb_array: *const i64, + beta_array: *const f32, + Carray: *const *mut f32, + ldc_array: *const i64, + group_count: i64, + group_size: *const i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemmGroupedBatched( + handle: cuda_types::cublas::cublasHandle_t, + transa_array: *const cuda_types::cublas::cublasOperation_t, + transb_array: *const cuda_types::cublas::cublasOperation_t, + m_array: *const ::core::ffi::c_int, + n_array: *const ::core::ffi::c_int, + k_array: *const ::core::ffi::c_int, + alpha_array: *const f64, + Aarray: *const *const f64, + lda_array: *const ::core::ffi::c_int, + Barray: *const *const f64, + ldb_array: *const ::core::ffi::c_int, + beta_array: *const f64, + Carray: *const *mut f64, + ldc_array: *const ::core::ffi::c_int, + group_count: ::core::ffi::c_int, + group_size: *const ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgemmGroupedBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + transa_array: *const cuda_types::cublas::cublasOperation_t, + transb_array: *const cuda_types::cublas::cublasOperation_t, + m_array: *const i64, + n_array: *const i64, + k_array: *const i64, + alpha_array: *const f64, + Aarray: *const *const f64, + lda_array: *const i64, + Barray: *const *const f64, + ldb_array: *const i64, + beta_array: *const f64, + Carray: *const *mut f64, + ldc_array: *const i64, + group_count: i64, + group_size: *const i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmGroupedBatchedEx( + handle: cuda_types::cublas::cublasHandle_t, + transa_array: *const cuda_types::cublas::cublasOperation_t, + transb_array: *const cuda_types::cublas::cublasOperation_t, + m_array: *const ::core::ffi::c_int, + n_array: *const ::core::ffi::c_int, + k_array: *const ::core::ffi::c_int, + alpha_array: *const ::core::ffi::c_void, + Aarray: *const *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType_t, + lda_array: *const ::core::ffi::c_int, + Barray: *const *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType_t, + ldb_array: *const ::core::ffi::c_int, + beta_array: *const ::core::ffi::c_void, + Carray: *const *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType_t, + ldc_array: *const ::core::ffi::c_int, + group_count: ::core::ffi::c_int, + group_size: *const ::core::ffi::c_int, + computeType: cuda_types::cublas::cublasComputeType_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasGemmGroupedBatchedEx_64( + handle: cuda_types::cublas::cublasHandle_t, + transa_array: *const cuda_types::cublas::cublasOperation_t, + transb_array: *const cuda_types::cublas::cublasOperation_t, + m_array: *const i64, + n_array: *const i64, + k_array: *const i64, + alpha_array: *const ::core::ffi::c_void, + Aarray: *const *const ::core::ffi::c_void, + Atype: cuda_types::cublas::cudaDataType_t, + lda_array: *const i64, + Barray: *const *const ::core::ffi::c_void, + Btype: cuda_types::cublas::cudaDataType_t, + ldb_array: *const i64, + beta_array: *const ::core::ffi::c_void, + Carray: *const *mut ::core::ffi::c_void, + Ctype: cuda_types::cublas::cudaDataType_t, + ldc_array: *const i64, + group_count: i64, + group_size: *const i64, + computeType: cuda_types::cublas::cublasComputeType_t, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgeam( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const f32, + lda: ::core::ffi::c_int, + beta: *const f32, + B: *const f32, + ldb: ::core::ffi::c_int, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgeam_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const f32, + lda: i64, + beta: *const f32, + B: *const f32, + ldb: i64, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgeam( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const f64, + lda: ::core::ffi::c_int, + beta: *const f64, + B: *const f64, + ldb: ::core::ffi::c_int, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgeam_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const f64, + A: *const f64, + lda: i64, + beta: *const f64, + B: *const f64, + ldb: i64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgeam( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuComplex, + B: *const cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgeam_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + beta: *const cuda_types::cublas::cuComplex, + B: *const cuda_types::cublas::cuComplex, + ldb: i64, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgeam( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + beta: *const cuda_types::cublas::cuDoubleComplex, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgeam_64( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + beta: *const cuda_types::cublas::cuDoubleComplex, + B: *const cuda_types::cublas::cuDoubleComplex, + ldb: i64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrsmBatched( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f32, + A: *const *const f32, + lda: ::core::ffi::c_int, + B: *const *mut f32, + ldb: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrsmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const f32, + A: *const *const f32, + lda: i64, + B: *const *mut f32, + ldb: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrsmBatched( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const f64, + A: *const *const f64, + lda: ::core::ffi::c_int, + B: *const *mut f64, + ldb: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrsmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const f64, + A: *const *const f64, + lda: i64, + B: *const *mut f64, + ldb: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrsmBatched( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuComplex, + A: *const *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + B: *const *mut cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrsmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuComplex, + A: *const *const cuda_types::cublas::cuComplex, + lda: i64, + B: *const *mut cuda_types::cublas::cuComplex, + ldb: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrsmBatched( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + B: *const *mut cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + batchCount: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrsmBatched_64( + handle: cuda_types::cublas::cublasHandle_t, + side: cuda_types::cublas::cublasSideMode_t, + uplo: cuda_types::cublas::cublasFillMode_t, + trans: cuda_types::cublas::cublasOperation_t, + diag: cuda_types::cublas::cublasDiagType_t, + m: i64, + n: i64, + alpha: *const cuda_types::cublas::cuDoubleComplex, + A: *const *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + B: *const *mut cuda_types::cublas::cuDoubleComplex, + ldb: i64, + batchCount: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSdgmm( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + A: *const f32, + lda: ::core::ffi::c_int, + x: *const f32, + incx: ::core::ffi::c_int, + C: *mut f32, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSdgmm_64( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: i64, + n: i64, + A: *const f32, + lda: i64, + x: *const f32, + incx: i64, + C: *mut f32, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDdgmm( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + A: *const f64, + lda: ::core::ffi::c_int, + x: *const f64, + incx: ::core::ffi::c_int, + C: *mut f64, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDdgmm_64( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: i64, + n: i64, + A: *const f64, + lda: i64, + x: *const f64, + incx: i64, + C: *mut f64, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCdgmm( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuComplex, + incx: ::core::ffi::c_int, + C: *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCdgmm_64( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: i64, + n: i64, + A: *const cuda_types::cublas::cuComplex, + lda: i64, + x: *const cuda_types::cublas::cuComplex, + incx: i64, + C: *mut cuda_types::cublas::cuComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdgmm( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: ::core::ffi::c_int, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZdgmm_64( + handle: cuda_types::cublas::cublasHandle_t, + mode: cuda_types::cublas::cublasSideMode_t, + m: i64, + n: i64, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: i64, + x: *const cuda_types::cublas::cuDoubleComplex, + incx: i64, + C: *mut cuda_types::cublas::cuDoubleComplex, + ldc: i64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSmatinvBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const f32, + lda: ::core::ffi::c_int, + Ainv: *const *mut f32, + lda_inv: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDmatinvBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const f64, + lda: ::core::ffi::c_int, + Ainv: *const *mut f64, + lda_inv: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCmatinvBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + Ainv: *const *mut cuda_types::cublas::cuComplex, + lda_inv: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZmatinvBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + Ainv: *const *mut cuda_types::cublas::cuDoubleComplex, + lda_inv: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgeqrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + Aarray: *const *mut f32, + lda: ::core::ffi::c_int, + TauArray: *const *mut f32, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgeqrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + Aarray: *const *mut f64, + lda: ::core::ffi::c_int, + TauArray: *const *mut f64, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgeqrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + Aarray: *const *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + TauArray: *const *mut cuda_types::cublas::cuComplex, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgeqrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + Aarray: *const *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + TauArray: *const *mut cuda_types::cublas::cuDoubleComplex, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgelsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *mut f32, + lda: ::core::ffi::c_int, + Carray: *const *mut f32, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + devInfoArray: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgelsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *mut f64, + lda: ::core::ffi::c_int, + Carray: *const *mut f64, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + devInfoArray: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgelsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + Carray: *const *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + devInfoArray: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgelsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + Carray: *const *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + devInfoArray: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStpttr( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + AP: *const f32, + A: *mut f32, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtpttr( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + AP: *const f64, + A: *mut f64, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtpttr( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + AP: *const cuda_types::cublas::cuComplex, + A: *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtpttr( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + AP: *const cuda_types::cublas::cuDoubleComplex, + A: *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasStrttp( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + A: *const f32, + lda: ::core::ffi::c_int, + AP: *mut f32, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDtrttp( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + A: *const f64, + lda: ::core::ffi::c_int, + AP: *mut f64, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCtrttp( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + AP: *mut cuda_types::cublas::cuComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZtrttp( + handle: cuda_types::cublas::cublasHandle_t, + uplo: cuda_types::cublas::cublasFillMode_t, + n: ::core::ffi::c_int, + A: *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + AP: *mut cuda_types::cublas::cuDoubleComplex, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgetrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *mut f32, + lda: ::core::ffi::c_int, + P: *mut ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgetrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *mut f64, + lda: ::core::ffi::c_int, + P: *mut ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgetrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *mut cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + P: *mut ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgetrfBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *mut cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + P: *mut ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgetriBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const f32, + lda: ::core::ffi::c_int, + P: *const ::core::ffi::c_int, + C: *const *mut f32, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgetriBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const f64, + lda: ::core::ffi::c_int, + P: *const ::core::ffi::c_int, + C: *const *mut f64, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgetriBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + P: *const ::core::ffi::c_int, + C: *const *mut cuda_types::cublas::cuComplex, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgetriBatched( + handle: cuda_types::cublas::cublasHandle_t, + n: ::core::ffi::c_int, + A: *const *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + P: *const ::core::ffi::c_int, + C: *const *mut cuda_types::cublas::cuDoubleComplex, + ldc: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasSgetrsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *const f32, + lda: ::core::ffi::c_int, + devIpiv: *const ::core::ffi::c_int, + Barray: *const *mut f32, + ldb: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasDgetrsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *const f64, + lda: ::core::ffi::c_int, + devIpiv: *const ::core::ffi::c_int, + Barray: *const *mut f64, + ldb: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasCgetrsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *const cuda_types::cublas::cuComplex, + lda: ::core::ffi::c_int, + devIpiv: *const ::core::ffi::c_int, + Barray: *const *mut cuda_types::cublas::cuComplex, + ldb: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasZgetrsBatched( + handle: cuda_types::cublas::cublasHandle_t, + trans: cuda_types::cublas::cublasOperation_t, + n: ::core::ffi::c_int, + nrhs: ::core::ffi::c_int, + Aarray: *const *const cuda_types::cublas::cuDoubleComplex, + lda: ::core::ffi::c_int, + devIpiv: *const ::core::ffi::c_int, + Barray: *const *mut cuda_types::cublas::cuDoubleComplex, + ldb: ::core::ffi::c_int, + info: *mut ::core::ffi::c_int, + batchSize: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; + #[must_use] + fn cublasUint8gemmBias( + handle: cuda_types::cublas::cublasHandle_t, + transa: cuda_types::cublas::cublasOperation_t, + transb: cuda_types::cublas::cublasOperation_t, + transc: cuda_types::cublas::cublasOperation_t, + m: ::core::ffi::c_int, + n: ::core::ffi::c_int, + k: ::core::ffi::c_int, + A: *const ::core::ffi::c_uchar, + A_bias: ::core::ffi::c_int, + lda: ::core::ffi::c_int, + B: *const ::core::ffi::c_uchar, + B_bias: ::core::ffi::c_int, + ldb: ::core::ffi::c_int, + C: *mut ::core::ffi::c_uchar, + C_bias: ::core::ffi::c_int, + ldc: ::core::ffi::c_int, + C_mult: ::core::ffi::c_int, + C_shift: ::core::ffi::c_int, + ) -> cuda_types::cublas::cublasStatus_t; +} diff --git a/cuda_base/src/cublaslt.rs b/cuda_base/src/cublaslt.rs new file mode 100644 index 0000000..b18cc1c --- /dev/null +++ b/cuda_base/src/cublaslt.rs @@ -0,0 +1,581 @@ +// Generated automatically by zluda_bindgen +// DO NOT EDIT MANUALLY +#![allow(warnings)] +extern "system" { + #[must_use] + fn cublasLtCreate( + lightHandle: *mut cuda_types::cublaslt::cublasLtHandle_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + fn cublasLtDestroy( + lightHandle: cuda_types::cublaslt::cublasLtHandle_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + fn cublasLtGetStatusName( + status: cuda_types::cublaslt::cublasStatus_t, + ) -> *const ::core::ffi::c_char; + fn cublasLtGetStatusString( + status: cuda_types::cublaslt::cublasStatus_t, + ) -> *const ::core::ffi::c_char; + fn cublasLtGetVersion() -> usize; + fn cublasLtGetCudartVersion() -> usize; + #[must_use] + fn cublasLtGetProperty( + type_: cuda_types::cublaslt::libraryPropertyType, + value: *mut ::core::ffi::c_int, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + fn cublasLtHeuristicsCacheGetCapacity( + capacity: *mut usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + fn cublasLtHeuristicsCacheSetCapacity( + capacity: usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + /** Restricts usage of CPU instructions (ISA) specified by the flags in the mask. + + Flags can be combined with bitwise OR(|) operator. Supported flags: + - 0x1 -- x86-64 AVX512 ISA + + Default mask: 0 (any applicable ISA is allowed). + + The function returns the previous value of the mask. + The function takes precedence over the environment variable CUBLASLT_DISABLE_CPU_INSTRUCTIONS_MASK.*/ + fn cublasLtDisableCpuInstructionsSetMask( + mask: ::core::ffi::c_uint, + ) -> ::core::ffi::c_uint; + #[must_use] + /** Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C). + + \retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized + \retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g. + when workspaceSizeInBytes is less than workspace required by configured + algo + \retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured + operation + \retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device + \retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device + \retval CUBLAS_STATUS_SUCCESS if the operation completed successfully*/ + fn cublasLtMatmul( + lightHandle: cuda_types::cublaslt::cublasLtHandle_t, + computeDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t, + alpha: *const ::core::ffi::c_void, + A: *const ::core::ffi::c_void, + Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + B: *const ::core::ffi::c_void, + Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + beta: *const ::core::ffi::c_void, + C: *const ::core::ffi::c_void, + Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + D: *mut ::core::ffi::c_void, + Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t, + workspace: *mut ::core::ffi::c_void, + workspaceSizeInBytes: usize, + stream: cuda_types::cublaslt::cudaStream_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Matrix layout conversion helper (C = alpha * op(A) + beta * op(B)) + + Can be used to change memory order of data or to scale and shift the values. + + \retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized + \retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g. + when A is not NULL, but Adesc is NULL + \retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured + operation + \retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device + \retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device + \retval CUBLAS_STATUS_SUCCESS if the operation completed successfully*/ + fn cublasLtMatrixTransform( + lightHandle: cuda_types::cublaslt::cublasLtHandle_t, + transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t, + alpha: *const ::core::ffi::c_void, + A: *const ::core::ffi::c_void, + Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + beta: *const ::core::ffi::c_void, + B: *const ::core::ffi::c_void, + Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + C: *mut ::core::ffi::c_void, + Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + stream: cuda_types::cublaslt::cudaStream_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /// Internal. Do not use directly. + fn cublasLtMatrixLayoutInit_internal( + matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t, + size: usize, + type_: cuda_types::cublaslt::cudaDataType, + rows: u64, + cols: u64, + ld: i64, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Create new matrix layout descriptor. + + \retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated + \retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/ + fn cublasLtMatrixLayoutCreate( + matLayout: *mut cuda_types::cublaslt::cublasLtMatrixLayout_t, + type_: cuda_types::cublaslt::cudaDataType, + rows: u64, + cols: u64, + ld: i64, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Destroy matrix layout descriptor. + + \retval CUBLAS_STATUS_SUCCESS if operation was successful*/ + fn cublasLtMatrixLayoutDestroy( + matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Set matrix layout descriptor attribute. + + \param[in] matLayout The descriptor + \param[in] attr The attribute + \param[in] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + + \retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/ + fn cublasLtMatrixLayoutSetAttribute( + matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t, + attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t, + buf: *const ::core::ffi::c_void, + sizeInBytes: usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Get matrix layout descriptor attribute. + + \param[in] matLayout The descriptor + \param[in] attr The attribute + \param[out] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + \param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of + bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents + + \retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero + and buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/ + fn cublasLtMatrixLayoutGetAttribute( + matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t, + attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t, + buf: *mut ::core::ffi::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /// Internal. Do not use directly. + fn cublasLtMatmulDescInit_internal( + matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t, + size: usize, + computeType: cuda_types::cublaslt::cublasComputeType_t, + scaleType: cuda_types::cublaslt::cudaDataType_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Create new matmul operation descriptor. + + \retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated + \retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/ + fn cublasLtMatmulDescCreate( + matmulDesc: *mut cuda_types::cublaslt::cublasLtMatmulDesc_t, + computeType: cuda_types::cublaslt::cublasComputeType_t, + scaleType: cuda_types::cublaslt::cudaDataType_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Destroy matmul operation descriptor. + + \retval CUBLAS_STATUS_SUCCESS if operation was successful*/ + fn cublasLtMatmulDescDestroy( + matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Set matmul operation descriptor attribute. + + \param[in] matmulDesc The descriptor + \param[in] attr The attribute + \param[in] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + + \retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/ + fn cublasLtMatmulDescSetAttribute( + matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t, + attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t, + buf: *const ::core::ffi::c_void, + sizeInBytes: usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Get matmul operation descriptor attribute. + + \param[in] matmulDesc The descriptor + \param[in] attr The attribute + \param[out] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + \param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of + bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents + + \retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero + and buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/ + fn cublasLtMatmulDescGetAttribute( + matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t, + attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t, + buf: *mut ::core::ffi::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /// Internal. Do not use directly. + fn cublasLtMatrixTransformDescInit_internal( + transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t, + size: usize, + scaleType: cuda_types::cublaslt::cudaDataType, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Create new matrix transform operation descriptor. + + \retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated + \retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/ + fn cublasLtMatrixTransformDescCreate( + transformDesc: *mut cuda_types::cublaslt::cublasLtMatrixTransformDesc_t, + scaleType: cuda_types::cublaslt::cudaDataType, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Destroy matrix transform operation descriptor. + + \retval CUBLAS_STATUS_SUCCESS if operation was successful*/ + fn cublasLtMatrixTransformDescDestroy( + transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Set matrix transform operation descriptor attribute. + + \param[in] transformDesc The descriptor + \param[in] attr The attribute + \param[in] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + + \retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/ + fn cublasLtMatrixTransformDescSetAttribute( + transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t, + attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t, + buf: *const ::core::ffi::c_void, + sizeInBytes: usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Get matrix transform operation descriptor attribute. + + \param[in] transformDesc The descriptor + \param[in] attr The attribute + \param[out] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + \param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number + of bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents + + \retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero + and buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/ + fn cublasLtMatrixTransformDescGetAttribute( + transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t, + attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t, + buf: *mut ::core::ffi::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /// Internal. Do not use directly. + fn cublasLtMatmulPreferenceInit_internal( + pref: cuda_types::cublaslt::cublasLtMatmulPreference_t, + size: usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Create new matmul heuristic search preference descriptor. + + \retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated + \retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/ + fn cublasLtMatmulPreferenceCreate( + pref: *mut cuda_types::cublaslt::cublasLtMatmulPreference_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Destroy matmul heuristic search preference descriptor. + + \retval CUBLAS_STATUS_SUCCESS if operation was successful*/ + fn cublasLtMatmulPreferenceDestroy( + pref: cuda_types::cublaslt::cublasLtMatmulPreference_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Set matmul heuristic search preference descriptor attribute. + + \param[in] pref The descriptor + \param[in] attr The attribute + \param[in] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + + \retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/ + fn cublasLtMatmulPreferenceSetAttribute( + pref: cuda_types::cublaslt::cublasLtMatmulPreference_t, + attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t, + buf: *const ::core::ffi::c_void, + sizeInBytes: usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Get matmul heuristic search preference descriptor attribute. + + \param[in] pref The descriptor + \param[in] attr The attribute + \param[out] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + \param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of + bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents + + \retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero + and buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/ + fn cublasLtMatmulPreferenceGetAttribute( + pref: cuda_types::cublaslt::cublasLtMatmulPreference_t, + attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t, + buf: *mut ::core::ffi::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Query cublasLt heuristic for algorithm appropriate for given use case. + + \param[in] lightHandle Pointer to the allocated cuBLASLt handle for the cuBLASLt + context. See cublasLtHandle_t. + \param[in] operationDesc Handle to the matrix multiplication descriptor. + \param[in] Adesc Handle to the layout descriptors for matrix A. + \param[in] Bdesc Handle to the layout descriptors for matrix B. + \param[in] Cdesc Handle to the layout descriptors for matrix C. + \param[in] Ddesc Handle to the layout descriptors for matrix D. + \param[in] preference Pointer to the structure holding the heuristic search + preferences descriptor. See cublasLtMatrixLayout_t. + \param[in] requestedAlgoCount Size of heuristicResultsArray (in elements) and requested + maximum number of algorithms to return. + \param[in, out] heuristicResultsArray Output algorithms and associated runtime characteristics, + ordered in increasing estimated compute time. + \param[out] returnAlgoCount The number of heuristicResultsArray elements written. + + \retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero + \retval CUBLAS_STATUS_NOT_SUPPORTED if no heuristic function available for current configuration + \retval CUBLAS_STATUS_SUCCESS if query was successful, inspect + heuristicResultsArray[0 to (returnAlgoCount - 1)].state + for detail status of results*/ + fn cublasLtMatmulAlgoGetHeuristic( + lightHandle: cuda_types::cublaslt::cublasLtHandle_t, + operationDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t, + Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + preference: cuda_types::cublaslt::cublasLtMatmulPreference_t, + requestedAlgoCount: ::core::ffi::c_int, + heuristicResultsArray: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t, + returnAlgoCount: *mut ::core::ffi::c_int, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Routine to get all algo IDs that can potentially run + + \param[in] int requestedAlgoCount requested number of algos (must be less or equal to size of algoIdsA + (in elements)) \param[out] algoIdsA array to write algoIds to \param[out] returnAlgoCount number of algoIds + actually written + + \retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero + \retval CUBLAS_STATUS_SUCCESS if query was successful, inspect returnAlgoCount to get actual number of IDs + available*/ + fn cublasLtMatmulAlgoGetIds( + lightHandle: cuda_types::cublaslt::cublasLtHandle_t, + computeType: cuda_types::cublaslt::cublasComputeType_t, + scaleType: cuda_types::cublaslt::cudaDataType_t, + Atype: cuda_types::cublaslt::cudaDataType_t, + Btype: cuda_types::cublaslt::cudaDataType_t, + Ctype: cuda_types::cublaslt::cudaDataType_t, + Dtype: cuda_types::cublaslt::cudaDataType_t, + requestedAlgoCount: ::core::ffi::c_int, + algoIdsArray: *mut ::core::ffi::c_int, + returnAlgoCount: *mut ::core::ffi::c_int, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Initialize algo structure + + \retval CUBLAS_STATUS_INVALID_VALUE if algo is NULL or algoId is outside of recognized range + \retval CUBLAS_STATUS_NOT_SUPPORTED if algoId is not supported for given combination of data types + \retval CUBLAS_STATUS_SUCCESS if the structure was successfully initialized*/ + fn cublasLtMatmulAlgoInit( + lightHandle: cuda_types::cublaslt::cublasLtHandle_t, + computeType: cuda_types::cublaslt::cublasComputeType_t, + scaleType: cuda_types::cublaslt::cudaDataType_t, + Atype: cuda_types::cublaslt::cudaDataType_t, + Btype: cuda_types::cublaslt::cudaDataType_t, + Ctype: cuda_types::cublaslt::cudaDataType_t, + Dtype: cuda_types::cublaslt::cudaDataType_t, + algoId: ::core::ffi::c_int, + algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Check configured algo descriptor for correctness and support on current device. + + Result includes required workspace size and calculated wave count. + + CUBLAS_STATUS_SUCCESS doesn't fully guarantee algo will run (will fail if e.g. buffers are not correctly aligned); + but if cublasLtMatmulAlgoCheck fails, the algo will not run. + + \param[in] algo algo configuration to check + \param[out] result result structure to report algo runtime characteristics; algo field is never updated + + \retval CUBLAS_STATUS_INVALID_VALUE if matrix layout descriptors or operation descriptor don't match algo + descriptor + \retval CUBLAS_STATUS_NOT_SUPPORTED if algo configuration or data type combination is not currently supported on + given device + \retval CUBLAS_STATUS_ARCH_MISMATCH if algo configuration cannot be run using the selected device + \retval CUBLAS_STATUS_SUCCESS if check was successful*/ + fn cublasLtMatmulAlgoCheck( + lightHandle: cuda_types::cublaslt::cublasLtHandle_t, + operationDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t, + Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t, + algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t, + result: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Get algo capability attribute. + + E.g. to get list of supported Tile IDs: + cublasLtMatmulTile_t tiles[CUBLASLT_MATMUL_TILE_END]; + size_t num_tiles, size_written; + if (cublasLtMatmulAlgoCapGetAttribute(algo, CUBLASLT_ALGO_CAP_TILE_IDS, tiles, sizeof(tiles), size_written) == + CUBLAS_STATUS_SUCCESS) { num_tiles = size_written / sizeof(tiles[0]); + } + + \param[in] algo The algo descriptor + \param[in] attr The attribute + \param[out] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + \param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of + bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents + + \retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero + and buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/ + fn cublasLtMatmulAlgoCapGetAttribute( + algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t, + attr: cuda_types::cublaslt::cublasLtMatmulAlgoCapAttributes_t, + buf: *mut ::core::ffi::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Set algo configuration attribute. + + \param[in] algo The algo descriptor + \param[in] attr The attribute + \param[in] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + + \retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/ + fn cublasLtMatmulAlgoConfigSetAttribute( + algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t, + attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t, + buf: *const ::core::ffi::c_void, + sizeInBytes: usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Get algo configuration attribute. + + \param[in] algo The algo descriptor + \param[in] attr The attribute + \param[out] buf memory address containing the new value + \param[in] sizeInBytes size of buf buffer for verification (in bytes) + \param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of + bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents + + \retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero + and buf is NULL or sizeInBytes doesn't match size of internal storage for + selected attribute + \retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/ + fn cublasLtMatmulAlgoConfigGetAttribute( + algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t, + attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t, + buf: *mut ::core::ffi::c_void, + sizeInBytes: usize, + sizeWritten: *mut usize, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Experimental: Logger callback setter. + + \param[in] callback a user defined callback function to be called by the logger + + \retval CUBLAS_STATUS_SUCCESS if callback was set successfully*/ + fn cublasLtLoggerSetCallback( + callback: cuda_types::cublaslt::cublasLtLoggerCallback_t, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Experimental: Log file setter. + + \param[in] file an open file with write permissions + + \retval CUBLAS_STATUS_SUCCESS if log file was set successfully*/ + fn cublasLtLoggerSetFile(file: *mut FILE) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Experimental: Open log file. + + \param[in] logFile log file path. if the log file does not exist, it will be created + + \retval CUBLAS_STATUS_SUCCESS if log file was created successfully*/ + fn cublasLtLoggerOpenFile( + logFile: *const ::core::ffi::c_char, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Experimental: Log level setter. + + \param[in] level log level, should be one of the following: + 0. Off + 1. Errors + 2. Performance Trace + 3. Performance Hints + 4. Heuristics Trace + 5. API Trace + + \retval CUBLAS_STATUS_INVALID_VALUE if log level is not one of the above levels + + \retval CUBLAS_STATUS_SUCCESS if log level was set successfully*/ + fn cublasLtLoggerSetLevel( + level: ::core::ffi::c_int, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Experimental: Log mask setter. + + \param[in] mask log mask, should be a combination of the following masks: + 0. Off + 1. Errors + 2. Performance Trace + 4. Performance Hints + 8. Heuristics Trace + 16. API Trace + + \retval CUBLAS_STATUS_SUCCESS if log mask was set successfully*/ + fn cublasLtLoggerSetMask( + mask: ::core::ffi::c_int, + ) -> cuda_types::cublaslt::cublasStatus_t; + #[must_use] + /** Experimental: Disable logging for the entire session. + + \retval CUBLAS_STATUS_SUCCESS if disabled logging*/ + fn cublasLtLoggerForceDisable() -> cuda_types::cublaslt::cublasStatus_t; +} diff --git a/cuda_base/src/cuda.rs b/cuda_base/src/cuda.rs index 37aadf1..a53f6a9 100644 --- a/cuda_base/src/cuda.rs +++ b/cuda_base/src/cuda.rs @@ -521,6 +521,12 @@ extern "system" { - ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING: GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See ::CUGPUDirectRDMAWritesOrdering for the numerical values returned here. - ::CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES: Bitmask of handle types supported with mempool based IPC - ::CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED: Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays. + - ::CU_DEVICE_ATTRIBUTE_NUMA_CONFIG: NUMA configuration of a device: value is of type ::CUdeviceNumaConfig enum + - ::CU_DEVICE_ATTRIBUTE_NUMA_ID: NUMA node ID of the GPU memory + - ::CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED: Device supports switch multicast and reduction operations. + - ::CU_DEVICE_ATTRIBUTE_GPU_PCI_DEVICE_ID: The combined 16-bit PCI device ID and 16-bit PCI vendor ID. + - ::CU_DEVICE_ATTRIBUTE_GPU_PCI_SUBSYSTEM_ID: The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID. +ID. \param pi - Returned device attribute value \param attrib - Device attribute to query @@ -710,6 +716,15 @@ extern "system" { determined by comparing the numerical values between the two enums, with smaller scopes having smaller values. + On platforms that support GPUDirect RDMA writes via more than one path in + hardware (see ::CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE), the user should + consider those paths as belonging to separate ordering domains. Note that in + such cases CUDA driver will report both RDMA writes ordering and RDMA write + scope as ALL_DEVICES and a call to cuFlushGPUDirectRDMA will be a no-op, + but when these multiple paths are used simultaneously, it is the user's + responsibility to ensure ordering by using mechanisms outside the scope of + CUDA. + Users may query support for this API via ::CU_DEVICE_ATTRIBUTE_FLUSH_FLUSH_GPU_DIRECT_RDMA_OPTIONS. @@ -1348,6 +1363,163 @@ int textureAlign flags: ::core::ffi::c_uint, dev: cuda_types::cuda::CUdevice, ) -> cuda_types::cuda::CUresult; + /** \brief Create a CUDA context + + Creates a new CUDA context and associates it with the calling thread. The + \p flags parameter is described below. The context is created with a usage + count of 1 and the caller of ::cuCtxCreate() must call ::cuCtxDestroy() + when done using the context. If a context is already current to the thread, + it is supplanted by the newly created context and may be restored by a subsequent + call to ::cuCtxPopCurrent(). + + CUDA context can be created with execution affinity. The type and the amount of +execution resource the context can use is limited by \p paramsArray and \p numExecAffinityParams +in \p execAffinity. The \p paramsArray is an array of \p CUexecAffinityParam and the \p numExecAffinityParams + describes the size of the paramsArray. If two \p CUexecAffinityParam in the array have the same type, + the latter execution affinity parameter overrides the former execution affinity parameter. + The supported execution affinity types are: + - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT limits the portion of SMs that the context can use. The portion + of SMs is specified as the number of SMs via \p CUexecAffinitySmCount. This limit will be internally + rounded up to the next hardware-supported amount. Hence, it is imperative to query the actual execution + affinity of the context via \p cuCtxGetExecAffinity after context creation. Currently, this attribute + is only supported under Volta+ MPS. + + CUDA context can be created in CIG(CUDA in Graphics) mode by setting \p cigParams. + Data from graphics client is shared with CUDA via the \p sharedData in \p cigParams. + Support for D3D12 graphics client can be determined using ::cuDeviceGetAttribute() with + ::CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED. \p sharedData is a ID3D12CommandQueue handle. + Either \p execAffinityParams or \p cigParams can be set to a non-null value. Setting both to a + non-null value will result in an undefined behavior. + + The three LSBs of the \p flags parameter can be used to control how the OS + thread, which owns the CUDA context at the time of an API call, interacts + with the OS scheduler when waiting for results from the GPU. Only one of + the scheduling flags can be set when creating a context. + + - ::CU_CTX_SCHED_SPIN: Instruct CUDA to actively spin when waiting for + results from the GPU. This can decrease latency when waiting for the GPU, + but may lower the performance of CPU threads if they are performing work in + parallel with the CUDA thread. + + - ::CU_CTX_SCHED_YIELD: Instruct CUDA to yield its thread when waiting for + results from the GPU. This can increase latency when waiting for the GPU, + but can increase the performance of CPU threads performing work in parallel + with the GPU. + + - ::CU_CTX_SCHED_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work. + + - ::CU_CTX_BLOCKING_SYNC: Instruct CUDA to block the CPU thread on a + synchronization primitive when waiting for the GPU to finish work.
+ Deprecated: This flag was deprecated as of CUDA 4.0 and was + replaced with ::CU_CTX_SCHED_BLOCKING_SYNC. + + - ::CU_CTX_SCHED_AUTO: The default value if the \p flags parameter is zero, + uses a heuristic based on the number of active CUDA contexts in the + process \e C and the number of logical processors in the system \e P. If + \e C > \e P, then CUDA will yield to other OS threads when waiting for + the GPU (::CU_CTX_SCHED_YIELD), otherwise CUDA will not yield while + waiting for results and actively spin on the processor (::CU_CTX_SCHED_SPIN). + Additionally, on Tegra devices, ::CU_CTX_SCHED_AUTO uses a heuristic based on + the power profile of the platform and may choose ::CU_CTX_SCHED_BLOCKING_SYNC + for low-powered devices. + + - ::CU_CTX_MAP_HOST: Instruct CUDA to support mapped pinned allocations. + This flag must be set in order to allocate pinned host memory that is + accessible to the GPU. + + - ::CU_CTX_LMEM_RESIZE_TO_MAX: Instruct CUDA to not reduce local memory + after resizing local memory for a kernel. This can prevent thrashing by + local memory allocations when launching many kernels with high local + memory usage at the cost of potentially increased memory usage.
+ Deprecated: This flag is deprecated and the behavior enabled + by this flag is now the default and cannot be disabled. + Instead, the per-thread stack size can be controlled with ::cuCtxSetLimit(). + + - ::CU_CTX_COREDUMP_ENABLE: If GPU coredumps have not been enabled globally + with ::cuCoredumpSetAttributeGlobal or environment variables, this flag can + be set during context creation to instruct CUDA to create a coredump if + this context raises an exception during execution. These environment variables + are described in the CUDA-GDB user guide under the "GPU core dump support" + section. + The initial attributes will be taken from the global attributes at the time of + context creation. The other attributes that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. This flag is not supported when CUDA context is created in + CIG(CUDA in Graphics) mode. + + - ::CU_CTX_USER_COREDUMP_ENABLE: If user-triggered GPU coredumps have not + been enabled globally with ::cuCoredumpSetAttributeGlobal or environment + variables, this flag can be set during context creation to instruct CUDA to + create a coredump if data is written to a certain pipe that is present in the + OS space. These environment variables are described in the CUDA-GDB user + guide under the "GPU core dump support" section. + It is important to note that the pipe name *must* be set with + ::cuCoredumpSetAttributeGlobal before creating the context if this flag is + used. Setting this flag implies that ::CU_CTX_COREDUMP_ENABLE is set. + The initial attributes will be taken from the global attributes at the time of + context creation. The other attributes that control coredump output can be + modified by calling ::cuCoredumpSetAttribute from the created context after + it becomes current. + Setting this flag on any context creation is equivalent to setting the + ::CU_COREDUMP_ENABLE_USER_TRIGGER attribute to \p true globally. + This flag is not supported when CUDA context is created in + CIG(CUDA in Graphics) mode. + + - ::CU_CTX_SYNC_MEMOPS: Ensures that synchronous memory operations initiated + on this context will always synchronize. See further documentation in the + section titled "API Synchronization behavior" to learn more about cases when + synchronous memory operations can exhibit asynchronous behavior. + + Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of + the device is ::CU_COMPUTEMODE_PROHIBITED. The function ::cuDeviceGetAttribute() + can be used with ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the + compute mode of the device. The nvidia-smi tool can be used to set + the compute mode for * devices. + Documentation for nvidia-smi can be obtained by passing a + -h option to it. + + Context creation will fail with :: CUDA_ERROR_INVALID_VALUE if invalid parameter was + passed by client to create the CUDA context. + + Context creation in CIG mode will fail with ::CUDA_ERROR_NOT_SUPPORTED if CIG is not supported + by the device or the driver. + \param pctx - Returned context handle of the new context + \param ctxCreateParams - Context creation parameters + \param flags - Context creation flags + \param dev - Device to create context on + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_DEVICE, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_NOT_SUPPORTED, + ::CUDA_ERROR_OUT_OF_MEMORY, + ::CUDA_ERROR_UNKNOWN + \notefnerr + + \sa ::cuCtxDestroy, + ::cuCtxGetApiVersion, + ::cuCtxGetCacheConfig, + ::cuCtxGetDevice, + ::cuCtxGetFlags, + ::cuCtxGetLimit, + ::cuCtxPopCurrent, + ::cuCtxPushCurrent, + ::cuCtxSetCacheConfig, + ::cuCtxSetLimit, + ::cuCoredumpSetAttributeGlobal, + ::cuCoredumpSetAttribute, + ::cuCtxSynchronize*/ + fn cuCtxCreate_v4( + pctx: *mut cuda_types::cuda::CUcontext, + ctxCreateParams: *mut cuda_types::cuda::CUctxCreateParams, + flags: ::core::ffi::c_uint, + dev: cuda_types::cuda::CUdevice, + ) -> cuda_types::cuda::CUresult; /** \brief Destroy a CUDA context Destroys the CUDA context specified by \p ctx. The context \p ctx will be @@ -1358,9 +1530,11 @@ int textureAlign Destroys and cleans up all resources associated with the context. It is the caller's responsibility to ensure that the context or its resources are not accessed or passed in subsequent API calls and doing so will result in undefined behavior. - These resources include CUDA types such as ::CUmodule, ::CUfunction, ::CUstream, ::CUevent, + These resources include CUDA types ::CUmodule, ::CUfunction, ::CUstream, ::CUevent, ::CUarray, ::CUmipmappedArray, ::CUtexObject, ::CUsurfObject, ::CUtexref, ::CUsurfref, ::CUgraphicsResource, ::CUlinkState, ::CUexternalMemory and ::CUexternalSemaphore. + These resources also include memory allocations by ::cuMemAlloc(), ::cuMemAllocHost(), + ::cuMemAllocManaged() and ::cuMemAllocPitch(). If \p ctx is current to the calling thread then \p ctx will also be popped from the current thread's context stack (as though ::cuCtxPopCurrent() @@ -1368,6 +1542,10 @@ int textureAlign remain current to those threads, and attempting to access \p ctx from those threads will result in the error ::CUDA_ERROR_CONTEXT_IS_DESTROYED. + \note ::cuCtxDestroy() will not destroy memory allocations by ::cuMemCreate(), ::cuMemAllocAsync() and + ::cuMemAllocFromPoolAsync(). These memory allocations are not associated with any CUDA context and need to + be destroyed explicitly. + \param ctx - Context to destroy \return @@ -1505,11 +1683,11 @@ int textureAlign fn cuCtxGetCurrent( pctx: *mut cuda_types::cuda::CUcontext, ) -> cuda_types::cuda::CUresult; - /** \brief Returns the device ID for the current context + /** \brief Returns the device handle for the current context - Returns in \p *device the ordinal of the current context's device. + Returns in \p *device the handle of the current context's device. - \param device - Returned device ID for the current context + \param device - Returned device handle for the current context \return ::CUDA_SUCCESS, @@ -1618,9 +1796,11 @@ int textureAlign ctx: cuda_types::cuda::CUcontext, ctxId: *mut ::core::ffi::c_ulonglong, ) -> cuda_types::cuda::CUresult; - /** \brief Block for a context's tasks to complete + /** \brief Block for the current context's tasks to complete - Blocks until the device has completed all preceding requested tasks. + Blocks until the current context has completed all preceding requested tasks. + If the current context is the primary context, green contexts that have been + created will also be synchronized. ::cuCtxSynchronize() returns an error if one of the preceding tasks failed. If the context was created with the ::CU_CTX_SCHED_BLOCKING_SYNC flag, the CPU thread will block until the GPU context has finished its work. @@ -1995,6 +2175,80 @@ int textureAlign pExecAffinity: *mut cuda_types::cuda::CUexecAffinityParam, type_: cuda_types::cuda::CUexecAffinityType, ) -> cuda_types::cuda::CUresult; + /** \brief Records an event. + + Captures in \p hEvent all the activities of the context \p hCtx + at the time of this call. \p hEvent and \p hCtx must be from the same + CUDA context, otherwise ::CUDA_ERROR_INVALID_HANDLE will be returned. + Calls such as ::cuEventQuery() or ::cuCtxWaitEvent() will then examine + or wait for completion of the work that was captured. + Uses of \p hCtx after this call do not modify \p hEvent. + If the context passed to \p hCtx is the primary context, \p hEvent will + capture all the activities of the primary context and its green contexts. + If the context passed to \p hCtx is a context converted from green context + via ::cuCtxFromGreenCtx(), \p hEvent will capture only the activities of the green context. + + \note The API will return ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED if the + specified context \p hCtx has a stream in the capture mode. In such a case, + the call will invalidate all the conflicting captures. + + \param hCtx - Context to record event for + \param hEvent - Event to record + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED + + \sa + ::cuCtxWaitEvent, + ::cuGreenCtxRecordEvent, + ::cuGreenCtxWaitEvent, + ::cuEventRecord*/ + fn cuCtxRecordEvent( + hCtx: cuda_types::cuda::CUcontext, + hEvent: cuda_types::cuda::CUevent, + ) -> cuda_types::cuda::CUresult; + /** \brief Make a context wait on an event + + Makes all future work submitted to context \p hCtx wait for all work + captured in \p hEvent. The synchronization will be performed on the device + and will not block the calling CPU thread. See ::cuCtxRecordEvent() + for details on what is captured by an event. + If the context passed to \p hCtx is the primary context, the primary context + and its green contexts will wait for \p hEvent. + If the context passed to \p hCtx is a context converted from green context + via ::cuCtxFromGreenCtx(), the green context will wait for \p hEvent. + + \note \p hEvent may be from a different context or device than \p hCtx. + + \note The API will return ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED and + invalidate the capture if the specified event \p hEvent is part of an ongoing + capture sequence or if the specified context \p hCtx has a stream in the capture mode. + + \param hCtx - Context to wait + \param hEvent - Event to wait on + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED + + \sa + ::cuCtxRecordEvent, + ::cuGreenCtxRecordEvent, + ::cuGreenCtxWaitEvent, + ::cuStreamWaitEvent*/ + fn cuCtxWaitEvent( + hCtx: cuda_types::cuda::CUcontext, + hEvent: cuda_types::cuda::CUevent, + ) -> cuda_types::cuda::CUresult; /** \brief Increment a context's usage-count \deprecated @@ -2494,6 +2748,11 @@ int textureAlign ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, and ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES will accumulate data until the CUlinkState is destroyed. + The data passed in via ::cuLinkAddData and ::cuLinkAddFile will be treated + as relocatable (-rdc=true to nvcc) when linking the final cubin during + ::cuLinkComplete and will have similar consequences as offline relocatable + device code linking. + \p optionValues must remain valid for the life of the CUlinkState if output options are used. No other references to inputs are maintained after this call returns. @@ -2739,6 +2998,7 @@ int textureAlign The \p code may be a \e cubin or \e fatbin as output by \b nvcc, or a NULL-terminated \e PTX, either as output by \b nvcc or hand-written. + A fatbin should also contain relocatable code when doing separate compilation. Options are passed as an array via \p jitOptions and any corresponding parameters are passed in \p jitOptionsValues. The number of total JIT options is supplied via \p numJitOptions. @@ -2747,6 +3007,9 @@ int textureAlign Library load options are passed as an array via \p libraryOptions and any corresponding parameters are passed in \p libraryOptionValues. The number of total library load options is supplied via \p numLibraryOptions. + \note If the library contains managed variables and no device in the system + supports managed variables this call is expected to return ::CUDA_ERROR_NOT_SUPPORTED + \param library - Returned library \param code - Code to load \param jitOptions - Options for JIT @@ -2767,7 +3030,8 @@ int textureAlign ::CUDA_ERROR_NO_BINARY_FOR_GPU, ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, - ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND, + ::CUDA_ERROR_NOT_SUPPORTED \sa ::cuLibraryLoadFromFile, ::cuLibraryUnload, @@ -2800,6 +3064,7 @@ int textureAlign The file should be a \e cubin file as output by \b nvcc, or a \e PTX file either as output by \b nvcc or handwritten, or a \e fatbin file as output by \b nvcc. + A fatbin should also contain relocatable code when doing separate compilation. Options are passed as an array via \p jitOptions and any corresponding parameters are passed in \p jitOptionsValues. The number of total options is supplied via \p numJitOptions. @@ -2808,6 +3073,9 @@ int textureAlign Library load options are passed as an array via \p libraryOptions and any corresponding parameters are passed in \p libraryOptionValues. The number of total library load options is supplied via \p numLibraryOptions. + \note If the library contains managed variables and no device in the system + supports managed variables this call is expected to return ::CUDA_ERROR_NOT_SUPPORTED + \param library - Returned library \param fileName - File to load from \param jitOptions - Options for JIT @@ -2828,7 +3096,8 @@ int textureAlign ::CUDA_ERROR_NO_BINARY_FOR_GPU, ::CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND, ::CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, - ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND + ::CUDA_ERROR_JIT_COMPILER_NOT_FOUND, + ::CUDA_ERROR_NOT_SUPPORTED \sa ::cuLibraryLoadData, ::cuLibraryUnload, @@ -2980,6 +3249,29 @@ int textureAlign pFunc: *mut cuda_types::cuda::CUfunction, kernel: cuda_types::cuda::CUkernel, ) -> cuda_types::cuda::CUresult; + /** \brief Returns a library handle + + Returns in \p pLib the handle of the library for the requested kernel \p kernel + + \param pLib - Returned library handle + \param kernel - Kernel to retrieve library handle + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_NOT_FOUND + + \sa ::cuLibraryLoadData, + ::cuLibraryLoadFromFile, + ::cuLibraryUnload, + ::cuLibraryGetKernel*/ + fn cuKernelGetLibrary( + pLib: *mut cuda_types::cuda::CUlibrary, + kernel: cuda_types::cuda::CUkernel, + ) -> cuda_types::cuda::CUresult; /** \brief Returns a global device pointer Returns in \p *dptr and \p *bytes the base pointer and size of the global with @@ -3023,9 +3315,6 @@ int textureAlign Note that managed memory for library \p library is shared across devices and is registered when the library is loaded into atleast one context. - \note The API requires a CUDA context to be present and initialized on at least one device. - If no context is present, the call returns ::CUDA_ERROR_NOT_FOUND. - \param dptr - Returned pointer to the managed memory \param bytes - Returned memory size in bytes \param library - Library to retrieve managed memory from @@ -3207,6 +3496,9 @@ int textureAlign positive. The validity of the cluster dimensions is checked at launch time. If the value is set during compile time, it cannot be set at runtime. Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. + - ::CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: Indicates whether + the function can be launched with non-portable cluster size. 1 is allowed, + 0 is disallowed. - ::CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: The block scheduling policy of a function. The value type is CUclusterSchedulingPolicy. @@ -3501,9 +3793,10 @@ T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; ::cuMemAllocPitch(), ::cuMemAllocManaged(), ::cuMemAllocAsync(), ::cuMemAllocFromPoolAsync() Note - This API will not perform any implict synchronization when the pointer was allocated with - ::cuMemAllocAsync or ::cuMemAllocFromPoolAsync. Callers must ensure that all accesses to the + ::cuMemAllocAsync or ::cuMemAllocFromPoolAsync. Callers must ensure that all accesses to these pointer have completed before invoking ::cuMemFree. For best performance and memory reuse, users should use ::cuMemFreeAsync to free memory allocated via the stream ordered memory allocator. + For all other pointers, this API may perform implicit synchronization. \param dptr - Pointer to memory to free @@ -4056,7 +4349,8 @@ T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. - IPC functionality on Windows is restricted to GPUs in TCC mode + IPC functionality on Windows is supported for compatibility purposes + but not recommended as it comes with performance cost. Users can test their device for IPC functionality by calling ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED @@ -4099,7 +4393,8 @@ T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. - IPC functionality on Windows is restricted to GPUs in TCC mode + IPC functionality on Windows is supported for compatibility purposes + but not recommended as it comes with performance cost. Users can test their device for IPC functionality by calling ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED @@ -4144,7 +4439,8 @@ T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. - IPC functionality on Windows is restricted to GPUs in TCC mode + IPC functionality on Windows is supported for compatibility purposes + but not recommended as it comes with performance cost. Users can test their device for IPC functionality by calling ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED @@ -4199,7 +4495,8 @@ T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. - IPC functionality on Windows is restricted to GPUs in TCC mode + IPC functionality on Windows is supported for compatibility purposes + but not recommended as it comes with performance cost. Users can test their device for IPC functionality by calling ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED @@ -4245,7 +4542,8 @@ T* pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; IPC functionality is restricted to devices with support for unified addressing on Linux and Windows operating systems. - IPC functionality on Windows is restricted to GPUs in TCC mode + IPC functionality on Windows is supported for compatibility purposes + but not recommended as it comes with performance cost. Users can test their device for IPC functionality by calling ::cuapiDeviceGetAttribute with ::CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED @@ -5954,6 +6252,160 @@ CUdeviceptr dstStart = dstDevice+(dstZ*dstHeight+dstY)*dstPitch+dstXInBytes; pCopy: *const cuda_types::cuda::CUDA_MEMCPY3D_PEER, hStream: cuda_types::cuda::CUstream, ) -> cuda_types::cuda::CUresult; + /** \brief Performs a batch of memory copies asynchronously. + + Performs a batch of memory copies. The batch as a whole executes in stream order but copies within a + batch are not guaranteed to execute in any specific order. This API only supports pointer-to-pointer copies. + For copies involving CUDA arrays, please see ::cuMemcpy3DBatchAsync. + + Performs memory copies from source buffers specified in \p srcs to destination buffers specified in \p dsts. + The size of each copy is specified in \p sizes. All three arrays must be of the same length as specified + by \p count. Since there are no ordering guarantees for copies within a batch, specifying any dependent copies + within a batch will result in undefined behavior. + + Every copy in the batch has to be associated with a set of attributes specified in the \p attrs array. + Each entry in this array can apply to more than one copy. This can be done by specifying in the \p attrsIdxs array, + the index of the first copy that the corresponding entry in the \p attrs array applies to. Both \p attrs and + \p attrsIdxs must be of the same length as specified by \p numAttrs. For example, if a batch has 10 copies listed + in dst/src/sizes, the first 6 of which have one set of attributes and the remaining 4 another, then \p numAttrs + will be 2, \p attrsIdxs will be {0, 6} and \p attrs will contains the two sets of attributes. Note that the first entry + in \p attrsIdxs must always be 0. Also, each entry must be greater than the previous entry and the last entry should be + less than \p count. Furthermore, \p numAttrs must be lesser than or equal to \p count. + + The ::CUmemcpyAttributes::srcAccessOrder indicates the source access ordering to be observed for copies associated + with the attribute. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_STREAM, then the source will + be accessed in stream order. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_DURING_API_CALL then + it indicates that access to the source pointer can be out of stream order and all accesses must be complete before + the API call returns. This flag is suited for ephemeral sources (ex., stack variables) when it's known that no prior + operations in the stream can be accessing the memory and also that the lifetime of the memory is limited to the scope + that the source variable was declared in. Specifying this flag allows the driver to optimize the copy and removes the + need for the user to synchronize the stream after the API call. If the source access order is set to + ::CU_MEMCPY_SRC_ACCESS_ORDER_ANY then it indicates that access to the source pointer can be out of stream order and the + accesses can happen even after the API call returns. This flag is suited for host pointers allocated + outside CUDA (ex., via malloc) when it's known that no prior operations in the stream can be accessing the memory. + Specifying this flag allows the driver to optimize the copy on certain platforms. Each memcpy operation in the batch must + have a valid ::CUmemcpyAttributes corresponding to it including the appropriate srcAccessOrder setting, otherwise the API + will return ::CUDA_ERROR_INVALID_VALUE. + + The ::CUmemcpyAttributes::srcLocHint and ::CUmemcpyAttributes::dstLocHint allows applications to specify hint locations + for operands of a copy when the operand doesn't have a fixed location. That is, these hints are + only applicable for managed memory pointers on devices where ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS is true or + system-allocated pageable memory on devices where ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS is true. + For other cases, these hints are ignored. + + The ::CUmemcpyAttributes::flags field can be used to specify certain flags for copies. Setting the + ::CU_MEMCPY_FLAG_PREFER_OVERLAP_WITH_COMPUTE flag indicates that the associated copies should preferably overlap with + any compute work. Note that this flag is a hint and can be ignored depending on the platform and other parameters of the copy. + + If any error is encountered while parsing the batch, the index within the batch where the error was encountered + will be returned in \p failIdx. + + \param dsts - Array of destination pointers. + \param srcs - Array of memcpy source pointers. + \param sizes - Array of sizes for memcpy operations. + \param count - Size of \p dsts, \p srcs and \p sizes arrays + \param attrs - Array of memcpy attributes. + \param attrsIdxs - Array of indices to specify which copies each entry in the \p attrs array applies to. +The attributes specified in attrs[k] will be applied to copies starting from attrsIdxs[k] +through attrsIdxs[k+1] - 1. Also attrs[numAttrs-1] will apply to copies starting from +attrsIdxs[numAttrs-1] through count - 1. + \param numAttrs - Size of \p attrs and \p attrsIdxs arrays. + \param failIdx - Pointer to a location to return the index of the copy where a failure was encountered. +The value will be SIZE_MAX if the error doesn't pertain to any specific copy. + \param hStream - The stream to enqueue the operations in. Must not be legacy NULL stream. + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_DEINITIALIZED + ::CUDA_ERROR_NOT_INITIALIZED + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_async + \note_memcpy*/ + fn cuMemcpyBatchAsync_ptsz( + dsts: *mut cuda_types::cuda::CUdeviceptr, + srcs: *mut cuda_types::cuda::CUdeviceptr, + sizes: *mut usize, + count: usize, + attrs: *mut cuda_types::cuda::CUmemcpyAttributes, + attrsIdxs: *mut usize, + numAttrs: usize, + failIdx: *mut usize, + hStream: cuda_types::cuda::CUstream, + ) -> cuda_types::cuda::CUresult; + /** \brief Performs a batch of 3D memory copies asynchronously. + + Performs a batch of memory copies. The batch as a whole executes in stream order but copies within a + batch are not guaranteed to execute in any specific order. Note that this means specifying any dependent + copies within a batch will result in undefined behavior. + + Performs memory copies as specified in the \p opList array. The length of this array is specified in \p numOps. + Each entry in this array describes a copy operation. This includes among other things, the source and destination + operands for the copy as specified in ::CUDA_MEMCPY3D_BATCH_OP::src and ::CUDA_MEMCPY3D_BATCH_OP::dst respectively. + The source and destination operands of a copy can either be a pointer or a CUDA array. The width, height and depth + of a copy is specified in ::CUDA_MEMCPY3D_BATCH_OP::extent. The width, height and depth of a copy are specified in + elements and must not be zero. For pointer-to-pointer copies, the element size is considered to be 1. For pointer + to CUDA array or vice versa copies, the element size is determined by the CUDA array. For CUDA array to CUDA array copies, + the element size of the two CUDA arrays must match. + + For a given operand, if ::CUmemcpy3DOperand::type is specified as ::CU_MEMCPY_OPERAND_TYPE_POINTER, then + ::CUmemcpy3DOperand::op::ptr will be used. The ::CUmemcpy3DOperand::op::ptr::ptr field must contain the pointer where + the copy should begin. The ::CUmemcpy3DOperand::op::ptr::rowLength field specifies the length of each row in elements and + must either be zero or be greater than or equal to the width of the copy specified in ::CUDA_MEMCPY3D_BATCH_OP::extent::width. + The ::CUmemcpy3DOperand::op::ptr::layerHeight field specifies the height of each layer and must either be zero or be greater than + or equal to the height of the copy specified in ::CUDA_MEMCPY3D_BATCH_OP::extent::height. When either of these values is zero, + that aspect of the operand is considered to be tightly packed according to the copy extent. For managed memory pointers on devices where + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS is true or system-allocated pageable memory on devices where + ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS is true, the ::CUmemcpy3DOperand::op::ptr::locHint field can be used to hint + the location of the operand. + + If an operand's type is specified as ::CU_MEMCPY_OPERAND_TYPE_ARRAY, then ::CUmemcpy3DOperand::op::array will be used. + The ::CUmemcpy3DOperand::op::array::array field specifies the CUDA array and ::CUmemcpy3DOperand::op::array::offset specifies + the 3D offset into that array where the copy begins. + + The ::CUmemcpyAttributes::srcAccessOrder indicates the source access ordering to be observed for copies associated + with the attribute. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_STREAM, then the source will + be accessed in stream order. If the source access order is set to ::CU_MEMCPY_SRC_ACCESS_ORDER_DURING_API_CALL then + it indicates that access to the source pointer can be out of stream order and all accesses must be complete before + the API call returns. This flag is suited for ephemeral sources (ex., stack variables) when it's known that no prior + operations in the stream can be accessing the memory and also that the lifetime of the memory is limited to the scope + that the source variable was declared in. Specifying this flag allows the driver to optimize the copy and removes the + need for the user to synchronize the stream after the API call. If the source access order is set to + ::CU_MEMCPY_SRC_ACCESS_ORDER_ANY then it indicates that access to the source pointer can be out of stream order and the + accesses can happen even after the API call returns. This flag is suited for host pointers allocated + outside CUDA (ex., via malloc) when it's known that no prior operations in the stream can be accessing the memory. + Specifying this flag allows the driver to optimize the copy on certain platforms. Each memcopy operation in \p opList must + have a valid srcAccessOrder setting, otherwise this API will return ::CUDA_ERROR_INVALID_VALUE. + + The ::CUmemcpyAttributes::flags field can be used to specify certain flags for copies. Setting the + ::CU_MEMCPY_FLAG_PREFER_OVERLAP_WITH_COMPUTE flag indicates that the associated copies should preferably overlap with + any compute work. Note that this flag is a hint and can be ignored depending on the platform and other parameters of the copy. + + If any error is encountered while parsing the batch, the index within the batch where the error was encountered + will be returned in \p failIdx. + + \param numOps - Total number of memcpy operations. + \param opList - Array of size \p numOps containing the actual memcpy operations. + \param failIdx - Pointer to a location to return the index of the copy where a failure was encountered. + The value will be SIZE_MAX if the error doesn't pertain to any specific copy. + \param flags - Flags for future use, must be zero now. + \param hStream - The stream to enqueue the operations in. Must not be default NULL stream. + + \return + ::CUDA_SUCCESS + ::CUDA_ERROR_DEINITIALIZED + ::CUDA_ERROR_NOT_INITIALIZED + ::CUDA_ERROR_INVALID_VALUE + \notefnerr + \note_async + \note_memcpy*/ + fn cuMemcpy3DBatchAsync_ptsz( + numOps: usize, + opList: *mut cuda_types::cuda::CUDA_MEMCPY3D_BATCH_OP, + failIdx: *mut usize, + flags: ::core::ffi::c_ulonglong, + hStream: cuda_types::cuda::CUstream, + ) -> cuda_types::cuda::CUresult; /** \brief Initializes device memory Sets the memory range of \p N 8-bit values to the specified value @@ -6479,7 +6931,50 @@ CU_AD_FORMAT_SIGNED_INT8 = 0x08, CU_AD_FORMAT_SIGNED_INT16 = 0x09, CU_AD_FORMAT_SIGNED_INT32 = 0x0a, CU_AD_FORMAT_HALF = 0x10, -CU_AD_FORMAT_FLOAT = 0x20 +CU_AD_FORMAT_FLOAT = 0x20, +CU_AD_FORMAT_NV12 = 0xb0, +CU_AD_FORMAT_UNORM_INT8X1 = 0xc0, +CU_AD_FORMAT_UNORM_INT8X2 = 0xc1, +CU_AD_FORMAT_UNORM_INT8X4 = 0xc2, +CU_AD_FORMAT_UNORM_INT16X1 = 0xc3, +CU_AD_FORMAT_UNORM_INT16X2 = 0xc4, +CU_AD_FORMAT_UNORM_INT16X4 = 0xc5, +CU_AD_FORMAT_SNORM_INT8X1 = 0xc6, +CU_AD_FORMAT_SNORM_INT8X2 = 0xc7, +CU_AD_FORMAT_SNORM_INT8X4 = 0xc8, +CU_AD_FORMAT_SNORM_INT16X1 = 0xc9, +CU_AD_FORMAT_SNORM_INT16X2 = 0xca, +CU_AD_FORMAT_SNORM_INT16X4 = 0xcb, +CU_AD_FORMAT_BC1_UNORM = 0x91, +CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92, +CU_AD_FORMAT_BC2_UNORM = 0x93, +CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94, +CU_AD_FORMAT_BC3_UNORM = 0x95, +CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96, +CU_AD_FORMAT_BC4_UNORM = 0x97, +CU_AD_FORMAT_BC4_SNORM = 0x98, +CU_AD_FORMAT_BC5_UNORM = 0x99, +CU_AD_FORMAT_BC5_SNORM = 0x9a, +CU_AD_FORMAT_BC6H_UF16 = 0x9b, +CU_AD_FORMAT_BC6H_SF16 = 0x9c, +CU_AD_FORMAT_BC7_UNORM = 0x9d, +CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e, +CU_AD_FORMAT_P010 = 0x9f, +CU_AD_FORMAT_P016 = 0xa1, +CU_AD_FORMAT_NV16 = 0xa2, +CU_AD_FORMAT_P210 = 0xa3, +CU_AD_FORMAT_P216 = 0xa4, +CU_AD_FORMAT_YUY2 = 0xa5, +CU_AD_FORMAT_Y210 = 0xa6, +CU_AD_FORMAT_Y216 = 0xa7, +CU_AD_FORMAT_AYUV = 0xa8, +CU_AD_FORMAT_Y410 = 0xa9, +CU_AD_FORMAT_Y416 = 0xb1, +CU_AD_FORMAT_Y444_PLANAR8 = 0xb2, +CU_AD_FORMAT_Y444_PLANAR10 = 0xb3, +CU_AD_FORMAT_YUV444_8bit_SemiPlanar = 0xb4, +CU_AD_FORMAT_YUV444_16bit_SemiPlanar = 0xb5, +CU_AD_FORMAT_UNORM_INT_101010_2 = 0x50, } CUarray_format; \endcode - \p NumChannels specifies the number of packed components per CUDA array @@ -6799,7 +7294,50 @@ CU_AD_FORMAT_SIGNED_INT8 = 0x08, CU_AD_FORMAT_SIGNED_INT16 = 0x09, CU_AD_FORMAT_SIGNED_INT32 = 0x0a, CU_AD_FORMAT_HALF = 0x10, -CU_AD_FORMAT_FLOAT = 0x20 +CU_AD_FORMAT_FLOAT = 0x20, +CU_AD_FORMAT_NV12 = 0xb0, +CU_AD_FORMAT_UNORM_INT8X1 = 0xc0, +CU_AD_FORMAT_UNORM_INT8X2 = 0xc1, +CU_AD_FORMAT_UNORM_INT8X4 = 0xc2, +CU_AD_FORMAT_UNORM_INT16X1 = 0xc3, +CU_AD_FORMAT_UNORM_INT16X2 = 0xc4, +CU_AD_FORMAT_UNORM_INT16X4 = 0xc5, +CU_AD_FORMAT_SNORM_INT8X1 = 0xc6, +CU_AD_FORMAT_SNORM_INT8X2 = 0xc7, +CU_AD_FORMAT_SNORM_INT8X4 = 0xc8, +CU_AD_FORMAT_SNORM_INT16X1 = 0xc9, +CU_AD_FORMAT_SNORM_INT16X2 = 0xca, +CU_AD_FORMAT_SNORM_INT16X4 = 0xcb, +CU_AD_FORMAT_BC1_UNORM = 0x91, +CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92, +CU_AD_FORMAT_BC2_UNORM = 0x93, +CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94, +CU_AD_FORMAT_BC3_UNORM = 0x95, +CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96, +CU_AD_FORMAT_BC4_UNORM = 0x97, +CU_AD_FORMAT_BC4_SNORM = 0x98, +CU_AD_FORMAT_BC5_UNORM = 0x99, +CU_AD_FORMAT_BC5_SNORM = 0x9a, +CU_AD_FORMAT_BC6H_UF16 = 0x9b, +CU_AD_FORMAT_BC6H_SF16 = 0x9c, +CU_AD_FORMAT_BC7_UNORM = 0x9d, +CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e, +CU_AD_FORMAT_P010 = 0x9f, +CU_AD_FORMAT_P016 = 0xa1, +CU_AD_FORMAT_NV16 = 0xa2, +CU_AD_FORMAT_P210 = 0xa3, +CU_AD_FORMAT_P216 = 0xa4, +CU_AD_FORMAT_YUY2 = 0xa5, +CU_AD_FORMAT_Y210 = 0xa6, +CU_AD_FORMAT_Y216 = 0xa7, +CU_AD_FORMAT_AYUV = 0xa8, +CU_AD_FORMAT_Y410 = 0xa9, +CU_AD_FORMAT_Y416 = 0xb1, +CU_AD_FORMAT_Y444_PLANAR8 = 0xb2, +CU_AD_FORMAT_Y444_PLANAR10 = 0xb3, +CU_AD_FORMAT_YUV444_8bit_SemiPlanar = 0xb4, +CU_AD_FORMAT_YUV444_16bit_SemiPlanar = 0xb5, +CU_AD_FORMAT_UNORM_INT_101010_2 = 0x50, } CUarray_format; \endcode @@ -7020,7 +7558,50 @@ CU_AD_FORMAT_SIGNED_INT8 = 0x08, CU_AD_FORMAT_SIGNED_INT16 = 0x09, CU_AD_FORMAT_SIGNED_INT32 = 0x0a, CU_AD_FORMAT_HALF = 0x10, -CU_AD_FORMAT_FLOAT = 0x20 +CU_AD_FORMAT_FLOAT = 0x20, +CU_AD_FORMAT_NV12 = 0xb0, +CU_AD_FORMAT_UNORM_INT8X1 = 0xc0, +CU_AD_FORMAT_UNORM_INT8X2 = 0xc1, +CU_AD_FORMAT_UNORM_INT8X4 = 0xc2, +CU_AD_FORMAT_UNORM_INT16X1 = 0xc3, +CU_AD_FORMAT_UNORM_INT16X2 = 0xc4, +CU_AD_FORMAT_UNORM_INT16X4 = 0xc5, +CU_AD_FORMAT_SNORM_INT8X1 = 0xc6, +CU_AD_FORMAT_SNORM_INT8X2 = 0xc7, +CU_AD_FORMAT_SNORM_INT8X4 = 0xc8, +CU_AD_FORMAT_SNORM_INT16X1 = 0xc9, +CU_AD_FORMAT_SNORM_INT16X2 = 0xca, +CU_AD_FORMAT_SNORM_INT16X4 = 0xcb, +CU_AD_FORMAT_BC1_UNORM = 0x91, +CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92, +CU_AD_FORMAT_BC2_UNORM = 0x93, +CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94, +CU_AD_FORMAT_BC3_UNORM = 0x95, +CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96, +CU_AD_FORMAT_BC4_UNORM = 0x97, +CU_AD_FORMAT_BC4_SNORM = 0x98, +CU_AD_FORMAT_BC5_UNORM = 0x99, +CU_AD_FORMAT_BC5_SNORM = 0x9a, +CU_AD_FORMAT_BC6H_UF16 = 0x9b, +CU_AD_FORMAT_BC6H_SF16 = 0x9c, +CU_AD_FORMAT_BC7_UNORM = 0x9d, +CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e, +CU_AD_FORMAT_P010 = 0x9f, +CU_AD_FORMAT_P016 = 0xa1, +CU_AD_FORMAT_NV16 = 0xa2, +CU_AD_FORMAT_P210 = 0xa3, +CU_AD_FORMAT_P216 = 0xa4, +CU_AD_FORMAT_YUY2 = 0xa5, +CU_AD_FORMAT_Y210 = 0xa6, +CU_AD_FORMAT_Y216 = 0xa7, +CU_AD_FORMAT_AYUV = 0xa8, +CU_AD_FORMAT_Y410 = 0xa9, +CU_AD_FORMAT_Y416 = 0xb1, +CU_AD_FORMAT_Y444_PLANAR8 = 0xb2, +CU_AD_FORMAT_Y444_PLANAR10 = 0xb3, +CU_AD_FORMAT_YUV444_8bit_SemiPlanar = 0xb4, +CU_AD_FORMAT_YUV444_16bit_SemiPlanar = 0xb5, +CU_AD_FORMAT_UNORM_INT_101010_2 = 0x50, } CUarray_format; \endcode @@ -7184,11 +7765,17 @@ CU_AD_FORMAT_FLOAT = 0x20 new handle every time the underlying physical allocation(s) corresponding to a previously queried VA range are changed. + For CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, users may set + flags to ::CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE. Which when set on a + supported platform, will give a DMA_BUF handle mapped via PCIE BAR1 or will + return an error otherwise. + \param[out] handle - Pointer to the location where the returned handle will be stored. \param[in] dptr - Pointer to a valid CUDA device allocation. Must be aligned to host page size. \param[in] size - Length of the address range. Must be aligned to host page size. \param[in] handleType - Type of handle requested (defines type and size of the \p handle output parameter) - \param[in] flags - Reserved, must be zero + \param[in] flags - When requesting CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD the value could be + ::CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE, otherwise 0. \return CUDA_SUCCESS @@ -7201,6 +7788,69 @@ CU_AD_FORMAT_FLOAT = 0x20 handleType: cuda_types::cuda::CUmemRangeHandleType, flags: ::core::ffi::c_ulonglong, ) -> cuda_types::cuda::CUresult; + /** \brief Submit a batch of \p count independent decompression operations. + + \details Each of the \p count decompression operations is described by a + single entry in the \p paramsArray array. Once the batch has been + submitted, the function will return, and decompression will happen + asynchronously w.r.t. the CPU. To the work completion tracking + mechanisms in the CUDA driver, the batch will be considered a single + unit of work and processed according to stream semantics, i.e., it + is not possible to query the completion of individual decompression + operations within a batch. + + The memory pointed to by each of ::CUmemDecompressParams.src, + ::CUmemDecompressParams.dst, and ::CUmemDecompressParams.dstActBytes, + must be capable of usage with the hardware decompress feature. That + is, for each of said pointers, the pointer attribute + ::CU_POINTER_ATTRIBUTE_IS_MEM_DECOMPRESS_CAPABLE should give a + non-zero value. To ensure this, the memory backing the pointers + should have been allocated using one of the following CUDA memory + allocators: + * ::cuMemAlloc() + * ::cuMemCreate() with the usage flag ::CU_MEM_CREATE_USAGE_HW_DECOMPRESS + * ::cuMemAllocFromPoolAsync() from a pool that was created with + the usage flag ::CU_MEM_POOL_CREATE_USAGE_HW_DECOMPRESS + Additionally, ::CUmemDecompressParams.src, ::CUmemDecompressParams.dst, + and ::CUmemDecompressParams.dstActBytes, must all be accessible from + the device associated with the context where \p stream was created. + For information on how to ensure this, see the documentation for the + allocator of interest. + + \param[in] paramsArray The array of structures describing the independent + decompression operations. + \param[in] count The number of entries in \p paramsArray array. + \param[in] flags Must be 0. + \param[out] errorIndex The index into \p paramsArray of the decompression + operation for which the error returned by this + function pertains to. If \p index is SIZE_MAX and + the value returned is not ::CUDA_SUCCESS, then the + error returned by this function should be considered + a general error that does not pertain to a + particular decompression operation. May be \p NULL, + in which case, no index will be recorded in the + event of error. + \param[in] stream The stream where the work will be enqueued. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE + \notefnerr + \note_async + \note_null_stream + + \sa ::cuMemAlloc, ::cuMemPoolCreate, ::cuMemAllocFromPoolAsync*/ + fn cuMemBatchDecompressAsync_ptsz( + paramsArray: *mut cuda_types::cuda::CUmemDecompressParams, + count: usize, + flags: ::core::ffi::c_uint, + errorIndex: *mut usize, + stream: cuda_types::cuda::CUstream, + ) -> cuda_types::cuda::CUresult; /** \brief Allocate an address range reservation. Reserves a virtual address range based on the given parameters, giving @@ -7270,17 +7920,23 @@ CU_AD_FORMAT_FLOAT = 0x20 set ::CUmemAllocationProp::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and ::CUmemAllocationProp::CUmemLocation::id must specify the NUMA ID of the CPU. On systems where NUMA is not available ::CUmemAllocationProp::CUmemLocation::id must be set to 0. + Specifying ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT or ::CU_MEM_LOCATION_TYPE_HOST as the + ::CUmemLocation::type will result in ::CUDA_ERROR_INVALID_VALUE. - Applications can set ::CUmemAllocationProp::requestedHandleTypes to - ::CU_MEM_HANDLE_TYPE_FABRIC in order to create allocations suitable for sharing - within an IMEX domain. An IMEX domain is either an OS instance or a group of securely - connected OS instances using the NVIDIA IMEX daemon. An IMEX channel is a global resource - within the IMEX domain that represents a logical entity that aims to provide fine grained - accessibility control for the participating processes. When exporter and importer CUDA processes - have been granted access to the same IMEX channel, they can securely share memory. - If the allocating process does not have access setup for an IMEX channel, attempting to create - a ::CUmemGenericAllocationHandle with ::CU_MEM_HANDLE_TYPE_FABRIC will result in ::CUDA_ERROR_NOT_PERMITTED. - The nvidia-modprobe CLI provides more information regarding setting up of IMEX channels. + Applications that intend to use ::CU_MEM_HANDLE_TYPE_FABRIC based memory sharing must ensure: + (1) `nvidia-caps-imex-channels` character device is created by the driver and is listed under /proc/devices + (2) have at least one IMEX channel file accessible by the user launching the application. + + When exporter and importer CUDA processes have been granted access to the same IMEX channel, they can securely + share memory. + + The IMEX channel security model works on a per user basis. Which means all processes under a user can share + memory if the user has access to a valid IMEX channel. When multi-user isolation is desired, a separate IMEX + channel is required for each user. + + These channel files exist in /dev/nvidia-caps-imex-channels/channel* and can be created using standard OS + native calls like mknod on Linux. For example: To create channel0 with the major number from /proc/devices + users can execute the following command: `mknod /dev/nvidia-caps-imex-channels/channel0 c 0` If ::CUmemAllocationProp::allocFlags::usage contains ::CU_MEM_CREATE_USAGE_TILE_POOL flag then the memory allocation is intended only to be used as backing tile pool for sparse CUDA arrays @@ -7955,22 +8611,28 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 To create a memory pool targeting a specific host NUMA node, applications must set ::CUmemPoolProps::CUmemLocation::type to ::CU_MEM_LOCATION_TYPE_HOST_NUMA and ::CUmemPoolProps::CUmemLocation::id must specify the NUMA ID of the host memory node. + Specifying ::CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT or ::CU_MEM_LOCATION_TYPE_HOST as the + ::CUmemPoolProps::CUmemLocation::type will result in ::CUDA_ERROR_INVALID_VALUE. By default, the pool's memory will be accessible from the device it is allocated on. In the case of pools created with ::CU_MEM_LOCATION_TYPE_HOST_NUMA, their default accessibility will be from the host CPU. Applications can control the maximum size of the pool by specifying a non-zero value for ::CUmemPoolProps::maxSize. If set to 0, the maximum size of the pool will default to a system dependent value. - Applications can set ::CUmemPoolProps::handleTypes to ::CU_MEM_HANDLE_TYPE_FABRIC - in order to create ::CUmemoryPool suitable for sharing within an IMEX domain. - An IMEX domain is either an OS instance or a group of securely connected OS instances - using the NVIDIA IMEX daemon. An IMEX channel is a global resource within the IMEX domain - that represents a logical entity that aims to provide fine grained accessibility control - for the participating processes. When exporter and importer CUDA processes have been - granted access to the same IMEX channel, they can securely share memory. - If the allocating process does not have access setup for an IMEX channel, attempting to export - a ::CUmemoryPool with ::CU_MEM_HANDLE_TYPE_FABRIC will result in ::CUDA_ERROR_NOT_PERMITTED. - The nvidia-modprobe CLI provides more information regarding setting up of IMEX channels. + Applications that intend to use ::CU_MEM_HANDLE_TYPE_FABRIC based memory sharing must ensure: + (1) `nvidia-caps-imex-channels` character device is created by the driver and is listed under /proc/devices + (2) have at least one IMEX channel file accessible by the user launching the application. + + When exporter and importer CUDA processes have been granted access to the same IMEX channel, they can securely + share memory. + + The IMEX channel security model works on a per user basis. Which means all processes under a user can share + memory if the user has access to a valid IMEX channel. When multi-user isolation is desired, a separate IMEX + channel is required for each user. + + These channel files exist in /dev/nvidia-caps-imex-channels/channel* and can be created using standard OS + native calls like mknod on Linux. For example: To create channel0 with the major number from /proc/devices + users can execute the following command: `mknod /dev/nvidia-caps-imex-channels/channel0 c 0` \note Specifying CU_MEM_HANDLE_TYPE_NONE creates a memory pool that will not support IPC. @@ -8251,8 +8913,8 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 returned by ::cuMulticastGetGranularity with the flag ::CU_MULTICAST_GRANULARITY_RECOMMENDED. - The \p size + \p memOffset must be smaller than the size of the allocated - memory. Similarly the \p size + \p mcOffset must be smaller than the size + The \p size + \p memOffset cannot be larger than the size of the allocated + memory. Similarly the \p size + \p mcOffset cannot be larger than the size of the multicast object. The memory allocation must have beeen created on one of the devices that was added to the multicast team via ::cuMulticastAddDevice. @@ -8303,8 +8965,8 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 aligned to the value returned by ::cuMulticastGetGranularity with the flag ::CU_MULTICAST_GRANULARITY_RECOMMENDED. - The \p size must be smaller than the size of the allocated memory. - Similarly the \p size + \p mcOffset must be smaller than the total size + The \p size cannot be larger than the size of the allocated memory. + Similarly the \p size + \p mcOffset cannot be larger than the total size of the multicast object. The memory allocation must have beeen created on one of the devices that was added to the multicast team via ::cuMulticastAddDevice. @@ -8348,7 +9010,7 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 The intended \p size of the unbind and the offset in the multicast range ( \p mcOffset ) must be a multiple of the value returned by ::cuMulticastGetGranularity flag ::CU_MULTICAST_GRANULARITY_MINIMUM. - The \p size + \p mcOffset must be smaller than the total size of the + The \p size + \p mcOffset cannot be larger than the total size of the multicast object. \note @@ -8547,6 +9209,12 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 Returns in \p *data the handle to the mempool that the allocation was obtained from. + - ::CU_POINTER_ATTRIBUTE_IS_HW_DECOMPRESS_CAPABLE: + + Returns in \p *data a boolean that indicates whether the pointer points + to memory that is capable to be used for hardware accelerated + decompression. + \par Note that for most allocations in the unified virtual address space @@ -8602,7 +9270,9 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 base device pointer of the memory to be prefetched and \p dstDevice is the destination device. \p count specifies the number of bytes to copy. \p hStream is the stream in which the operation is enqueued. The memory range must refer - to managed memory allocated via ::cuMemAllocManaged or declared via __managed__ variables. + to managed memory allocated via ::cuMemAllocManaged or declared via __managed__ variables + or it may also refer to system-allocated memory on systems with non-zero + CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS. Passing in CU_DEVICE_CPU for \p dstDevice will prefetch the data to host memory. If \p dstDevice is a GPU, then the device attribute ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS @@ -9179,6 +9849,7 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 - ::CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE - ::CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES - ::CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE + - ::CU_POINTER_ATTRIBUTE_IS_HW_DECOMPRESS_CAPABLE \param numAttributes - Number of attributes to query \param attributes - An array of attributes to query @@ -9237,8 +9908,10 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 \sa ::cuStreamDestroy, ::cuStreamCreateWithPriority, + ::cuGreenCtxStreamCreate, ::cuStreamGetPriority, ::cuStreamGetFlags, + ::cuStreamGetDevice ::cuStreamWaitEvent, ::cuStreamQuery, ::cuStreamSynchronize, @@ -9288,9 +9961,11 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 \sa ::cuStreamDestroy, ::cuStreamCreate, + ::cuGreenCtxStreamCreate, ::cuStreamGetPriority, ::cuCtxGetStreamPriorityRange, ::cuStreamGetFlags, + ::cuStreamGetDevice ::cuStreamWaitEvent, ::cuStreamQuery, ::cuStreamSynchronize, @@ -9303,7 +9978,7 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 ) -> cuda_types::cuda::CUresult; /** \brief Query the priority of a given stream - Query the priority of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority + Query the priority of a stream created using ::cuStreamCreate, ::cuStreamCreateWithPriority or ::cuGreenCtxStreamCreate and return the priority in \p priority. Note that if the stream was created with a priority outside the numerical range returned by ::cuCtxGetStreamPriorityRange, this function returns the clamped priority. @@ -9324,16 +9999,44 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 \sa ::cuStreamDestroy, ::cuStreamCreate, ::cuStreamCreateWithPriority, + ::cuGreenCtxStreamCreate, ::cuCtxGetStreamPriorityRange, ::cuStreamGetFlags, + ::cuStreamGetDevice ::cudaStreamGetPriority*/ fn cuStreamGetPriority_ptsz( hStream: cuda_types::cuda::CUstream, priority: *mut ::core::ffi::c_int, ) -> cuda_types::cuda::CUresult; + /** \brief Returns the device handle of the stream + + Returns in \p *device the device handle of the stream + + \param hStream - Handle to the stream to be queried + \param device - Returns the device to which a stream belongs + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_DEINITIALIZED, + ::CUDA_ERROR_NOT_INITIALIZED, + ::CUDA_ERROR_INVALID_CONTEXT, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_INVALID_HANDLE, + ::CUDA_ERROR_OUT_OF_MEMORY + \notefnerr + + \sa + ::cuStreamDestroy, + ::cuStreamCreate, + ::cuGreenCtxStreamCreate, + ::cuStreamGetFlags*/ + fn cuStreamGetDevice_ptsz( + hStream: cuda_types::cuda::CUstream, + device: *mut cuda_types::cuda::CUdevice, + ) -> cuda_types::cuda::CUresult; /** \brief Query the flags of a given stream - Query the flags of a stream created using ::cuStreamCreate or ::cuStreamCreateWithPriority + Query the flags of a stream created using ::cuStreamCreate, ::cuStreamCreateWithPriority or ::cuGreenCtxStreamCreate and return the flags in \p flags. \param hStream - Handle to the stream to be queried @@ -9353,8 +10056,10 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 \sa ::cuStreamDestroy, ::cuStreamCreate, + ::cuGreenCtxStreamCreate, ::cuStreamGetPriority, - ::cudaStreamGetFlags*/ + ::cudaStreamGetFlags + ::cuStreamGetDevice*/ fn cuStreamGetFlags_ptsz( hStream: cuda_types::cuda::CUstream, flags: *mut ::core::ffi::c_uint, @@ -9396,6 +10101,10 @@ CU_MEM_OPERATION_TYPE_UNMAP = 2 Returns the CUDA context that the stream is associated with. + Note there is a later version of this API, ::cuStreamGetCtx_v2. It will + supplant this version in CUDA 13.0. It is recommended to use ::cuStreamGetCtx_v2 + till then as this version will return ::CUDA_ERROR_NOT_SUPPORTED for streams created via the API ::cuGreenCtxStreamCreate. + The stream handle \p hStream can refer to any of the following: