From d81404eb704be596165225e35ed3de7d83a13c30 Mon Sep 17 00:00:00 2001
From: Violet <c01368481@gmail.com>
Date: Mon, 28 Jul 2025 15:07:22 -0700
Subject: [PATCH] Add support for `rocblas` to `zluda_bindgen` (#440)

One step of several for adding cublas support
---
 .github/workflows/pr_master.yml |     2 +-
 Cargo.lock                      |     7 +
 Cargo.toml                      |     1 +
 cuda_types/src/cuda.rs          |   406 +-
 cuda_types/src/nvml.rs          |   122 +-
 ext/hip_runtime-sys/src/lib.rs  |   328 +-
 ext/rocblas-sys/Cargo.toml      |    10 +
 ext/rocblas-sys/build.rs        |     9 +
 ext/rocblas-sys/src/lib.rs      | 31561 ++++++++++++++++++++++++++++++
 zluda_bindgen/src/main.rs       |    56 +-
 10 files changed, 32084 insertions(+), 418 deletions(-)
 create mode 100644 ext/rocblas-sys/Cargo.toml
 create mode 100644 ext/rocblas-sys/build.rs
 create mode 100644 ext/rocblas-sys/src/lib.rs

diff --git a/.github/workflows/pr_master.yml b/.github/workflows/pr_master.yml
index 319e167..9b50a84 100644
--- a/.github/workflows/pr_master.yml
+++ b/.github/workflows/pr_master.yml
@@ -91,7 +91,7 @@ jobs:
         tool: cargo-export
     - name: Build
       run: |
-        cargo export target/tests -- test --features ci_build --workspace --exclude cuda_macros --exclude ptx_parser_macros
+        cargo export target/tests -- test --features ci_build --workspace --exclude cuda_macros --exclude ptx_parser_macros --exclude rocblas-sys
         mkdir -p target/amdgpu
         bash .github/workflows/move_tests.sh target/tests amdgpu
         strip target/amdgpu/*
diff --git a/Cargo.lock b/Cargo.lock
index b415e20..b4d2322 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1204,6 +1204,13 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
 
+[[package]]
+name = "rocblas-sys"
+version = "0.1.0"
+dependencies = [
+ "hip_runtime-sys",
+]
+
 [[package]]
 name = "rustc-hash"
 version = "1.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 1715492..5022510 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ members = [
     "detours-sys",
     "ext/amd_comgr-sys",
     "ext/hip_runtime-sys",
+    "ext/rocblas-sys",
     "format",
     "ptx",
     "ptx_parser",
diff --git a/cuda_types/src/cuda.rs b/cuda_types/src/cuda.rs
index 4cc6cd6..cfdef15 100644
--- a/cuda_types/src/cuda.rs
+++ b/cuda_types/src/cuda.rs
@@ -8454,301 +8454,301 @@ pub type cuFloatComplex = float2;
 pub type cuDoubleComplex = double2;
 pub type cuComplex = cuFloatComplex;
 impl CUerror {
-    pub const INVALID_VALUE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_VALUE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(1)
     });
-    pub const OUT_OF_MEMORY: CUerror = CUerror(unsafe {
+    pub const r#OUT_OF_MEMORY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(2)
     });
-    pub const NOT_INITIALIZED: CUerror = CUerror(unsafe {
+    pub const r#NOT_INITIALIZED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(3)
     });
-    pub const DEINITIALIZED: CUerror = CUerror(unsafe {
+    pub const r#DEINITIALIZED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(4)
     });
-    pub const PROFILER_DISABLED: CUerror = CUerror(unsafe {
+    pub const r#PROFILER_DISABLED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(5)
     });
-    pub const PROFILER_NOT_INITIALIZED: CUerror = CUerror(unsafe {
+    pub const r#PROFILER_NOT_INITIALIZED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(6)
     });
-    pub const PROFILER_ALREADY_STARTED: CUerror = CUerror(unsafe {
+    pub const r#PROFILER_ALREADY_STARTED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(7)
     });
-    pub const PROFILER_ALREADY_STOPPED: CUerror = CUerror(unsafe {
+    pub const r#PROFILER_ALREADY_STOPPED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(8)
     });
-    pub const STUB_LIBRARY: CUerror = CUerror(unsafe {
+    pub const r#STUB_LIBRARY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(34)
     });
-    pub const DEVICE_UNAVAILABLE: CUerror = CUerror(unsafe {
+    pub const r#DEVICE_UNAVAILABLE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(46)
     });
-    pub const NO_DEVICE: CUerror = CUerror(unsafe {
+    pub const r#NO_DEVICE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(100)
     });
-    pub const INVALID_DEVICE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_DEVICE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(101)
     });
-    pub const DEVICE_NOT_LICENSED: CUerror = CUerror(unsafe {
+    pub const r#DEVICE_NOT_LICENSED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(102)
     });
-    pub const INVALID_IMAGE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_IMAGE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(200)
     });
-    pub const INVALID_CONTEXT: CUerror = CUerror(unsafe {
+    pub const r#INVALID_CONTEXT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(201)
     });
-    pub const CONTEXT_ALREADY_CURRENT: CUerror = CUerror(unsafe {
+    pub const r#CONTEXT_ALREADY_CURRENT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(202)
     });
-    pub const MAP_FAILED: CUerror = CUerror(unsafe {
+    pub const r#MAP_FAILED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(205)
     });
-    pub const UNMAP_FAILED: CUerror = CUerror(unsafe {
+    pub const r#UNMAP_FAILED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(206)
     });
-    pub const ARRAY_IS_MAPPED: CUerror = CUerror(unsafe {
+    pub const r#ARRAY_IS_MAPPED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(207)
     });
-    pub const ALREADY_MAPPED: CUerror = CUerror(unsafe {
+    pub const r#ALREADY_MAPPED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(208)
     });
-    pub const NO_BINARY_FOR_GPU: CUerror = CUerror(unsafe {
+    pub const r#NO_BINARY_FOR_GPU: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(209)
     });
-    pub const ALREADY_ACQUIRED: CUerror = CUerror(unsafe {
+    pub const r#ALREADY_ACQUIRED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(210)
     });
-    pub const NOT_MAPPED: CUerror = CUerror(unsafe {
+    pub const r#NOT_MAPPED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(211)
     });
-    pub const NOT_MAPPED_AS_ARRAY: CUerror = CUerror(unsafe {
+    pub const r#NOT_MAPPED_AS_ARRAY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(212)
     });
-    pub const NOT_MAPPED_AS_POINTER: CUerror = CUerror(unsafe {
+    pub const r#NOT_MAPPED_AS_POINTER: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(213)
     });
-    pub const ECC_UNCORRECTABLE: CUerror = CUerror(unsafe {
+    pub const r#ECC_UNCORRECTABLE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(214)
     });
-    pub const UNSUPPORTED_LIMIT: CUerror = CUerror(unsafe {
+    pub const r#UNSUPPORTED_LIMIT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(215)
     });
-    pub const CONTEXT_ALREADY_IN_USE: CUerror = CUerror(unsafe {
+    pub const r#CONTEXT_ALREADY_IN_USE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(216)
     });
-    pub const PEER_ACCESS_UNSUPPORTED: CUerror = CUerror(unsafe {
+    pub const r#PEER_ACCESS_UNSUPPORTED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(217)
     });
-    pub const INVALID_PTX: CUerror = CUerror(unsafe {
+    pub const r#INVALID_PTX: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(218)
     });
-    pub const INVALID_GRAPHICS_CONTEXT: CUerror = CUerror(unsafe {
+    pub const r#INVALID_GRAPHICS_CONTEXT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(219)
     });
-    pub const NVLINK_UNCORRECTABLE: CUerror = CUerror(unsafe {
+    pub const r#NVLINK_UNCORRECTABLE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(220)
     });
-    pub const JIT_COMPILER_NOT_FOUND: CUerror = CUerror(unsafe {
+    pub const r#JIT_COMPILER_NOT_FOUND: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(221)
     });
-    pub const UNSUPPORTED_PTX_VERSION: CUerror = CUerror(unsafe {
+    pub const r#UNSUPPORTED_PTX_VERSION: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(222)
     });
-    pub const JIT_COMPILATION_DISABLED: CUerror = CUerror(unsafe {
+    pub const r#JIT_COMPILATION_DISABLED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(223)
     });
-    pub const UNSUPPORTED_EXEC_AFFINITY: CUerror = CUerror(unsafe {
+    pub const r#UNSUPPORTED_EXEC_AFFINITY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(224)
     });
-    pub const UNSUPPORTED_DEVSIDE_SYNC: CUerror = CUerror(unsafe {
+    pub const r#UNSUPPORTED_DEVSIDE_SYNC: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(225)
     });
-    pub const CONTAINED: CUerror = CUerror(unsafe {
+    pub const r#CONTAINED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(226)
     });
-    pub const INVALID_SOURCE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_SOURCE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(300)
     });
-    pub const FILE_NOT_FOUND: CUerror = CUerror(unsafe {
+    pub const r#FILE_NOT_FOUND: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(301)
     });
-    pub const SHARED_OBJECT_SYMBOL_NOT_FOUND: CUerror = CUerror(unsafe {
+    pub const r#SHARED_OBJECT_SYMBOL_NOT_FOUND: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(302)
     });
-    pub const SHARED_OBJECT_INIT_FAILED: CUerror = CUerror(unsafe {
+    pub const r#SHARED_OBJECT_INIT_FAILED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(303)
     });
-    pub const OPERATING_SYSTEM: CUerror = CUerror(unsafe {
+    pub const r#OPERATING_SYSTEM: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(304)
     });
-    pub const INVALID_HANDLE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_HANDLE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(400)
     });
-    pub const ILLEGAL_STATE: CUerror = CUerror(unsafe {
+    pub const r#ILLEGAL_STATE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(401)
     });
-    pub const LOSSY_QUERY: CUerror = CUerror(unsafe {
+    pub const r#LOSSY_QUERY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(402)
     });
-    pub const NOT_FOUND: CUerror = CUerror(unsafe {
+    pub const r#NOT_FOUND: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(500)
     });
-    pub const NOT_READY: CUerror = CUerror(unsafe {
+    pub const r#NOT_READY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(600)
     });
-    pub const ILLEGAL_ADDRESS: CUerror = CUerror(unsafe {
+    pub const r#ILLEGAL_ADDRESS: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(700)
     });
-    pub const LAUNCH_OUT_OF_RESOURCES: CUerror = CUerror(unsafe {
+    pub const r#LAUNCH_OUT_OF_RESOURCES: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(701)
     });
-    pub const LAUNCH_TIMEOUT: CUerror = CUerror(unsafe {
+    pub const r#LAUNCH_TIMEOUT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(702)
     });
-    pub const LAUNCH_INCOMPATIBLE_TEXTURING: CUerror = CUerror(unsafe {
+    pub const r#LAUNCH_INCOMPATIBLE_TEXTURING: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(703)
     });
-    pub const PEER_ACCESS_ALREADY_ENABLED: CUerror = CUerror(unsafe {
+    pub const r#PEER_ACCESS_ALREADY_ENABLED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(704)
     });
-    pub const PEER_ACCESS_NOT_ENABLED: CUerror = CUerror(unsafe {
+    pub const r#PEER_ACCESS_NOT_ENABLED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(705)
     });
-    pub const PRIMARY_CONTEXT_ACTIVE: CUerror = CUerror(unsafe {
+    pub const r#PRIMARY_CONTEXT_ACTIVE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(708)
     });
-    pub const CONTEXT_IS_DESTROYED: CUerror = CUerror(unsafe {
+    pub const r#CONTEXT_IS_DESTROYED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(709)
     });
-    pub const ASSERT: CUerror = CUerror(unsafe {
+    pub const r#ASSERT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(710)
     });
-    pub const TOO_MANY_PEERS: CUerror = CUerror(unsafe {
+    pub const r#TOO_MANY_PEERS: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(711)
     });
-    pub const HOST_MEMORY_ALREADY_REGISTERED: CUerror = CUerror(unsafe {
+    pub const r#HOST_MEMORY_ALREADY_REGISTERED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(712)
     });
-    pub const HOST_MEMORY_NOT_REGISTERED: CUerror = CUerror(unsafe {
+    pub const r#HOST_MEMORY_NOT_REGISTERED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(713)
     });
-    pub const HARDWARE_STACK_ERROR: CUerror = CUerror(unsafe {
+    pub const r#HARDWARE_STACK_ERROR: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(714)
     });
-    pub const ILLEGAL_INSTRUCTION: CUerror = CUerror(unsafe {
+    pub const r#ILLEGAL_INSTRUCTION: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(715)
     });
-    pub const MISALIGNED_ADDRESS: CUerror = CUerror(unsafe {
+    pub const r#MISALIGNED_ADDRESS: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(716)
     });
-    pub const INVALID_ADDRESS_SPACE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_ADDRESS_SPACE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(717)
     });
-    pub const INVALID_PC: CUerror = CUerror(unsafe {
+    pub const r#INVALID_PC: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(718)
     });
-    pub const LAUNCH_FAILED: CUerror = CUerror(unsafe {
+    pub const r#LAUNCH_FAILED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(719)
     });
-    pub const COOPERATIVE_LAUNCH_TOO_LARGE: CUerror = CUerror(unsafe {
+    pub const r#COOPERATIVE_LAUNCH_TOO_LARGE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(720)
     });
-    pub const TENSOR_MEMORY_LEAK: CUerror = CUerror(unsafe {
+    pub const r#TENSOR_MEMORY_LEAK: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(721)
     });
-    pub const NOT_PERMITTED: CUerror = CUerror(unsafe {
+    pub const r#NOT_PERMITTED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(800)
     });
-    pub const NOT_SUPPORTED: CUerror = CUerror(unsafe {
+    pub const r#NOT_SUPPORTED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(801)
     });
-    pub const SYSTEM_NOT_READY: CUerror = CUerror(unsafe {
+    pub const r#SYSTEM_NOT_READY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(802)
     });
-    pub const SYSTEM_DRIVER_MISMATCH: CUerror = CUerror(unsafe {
+    pub const r#SYSTEM_DRIVER_MISMATCH: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(803)
     });
-    pub const COMPAT_NOT_SUPPORTED_ON_DEVICE: CUerror = CUerror(unsafe {
+    pub const r#COMPAT_NOT_SUPPORTED_ON_DEVICE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(804)
     });
-    pub const MPS_CONNECTION_FAILED: CUerror = CUerror(unsafe {
+    pub const r#MPS_CONNECTION_FAILED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(805)
     });
-    pub const MPS_RPC_FAILURE: CUerror = CUerror(unsafe {
+    pub const r#MPS_RPC_FAILURE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(806)
     });
-    pub const MPS_SERVER_NOT_READY: CUerror = CUerror(unsafe {
+    pub const r#MPS_SERVER_NOT_READY: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(807)
     });
-    pub const MPS_MAX_CLIENTS_REACHED: CUerror = CUerror(unsafe {
+    pub const r#MPS_MAX_CLIENTS_REACHED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(808)
     });
-    pub const MPS_MAX_CONNECTIONS_REACHED: CUerror = CUerror(unsafe {
+    pub const r#MPS_MAX_CONNECTIONS_REACHED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(809)
     });
-    pub const MPS_CLIENT_TERMINATED: CUerror = CUerror(unsafe {
+    pub const r#MPS_CLIENT_TERMINATED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(810)
     });
-    pub const CDP_NOT_SUPPORTED: CUerror = CUerror(unsafe {
+    pub const r#CDP_NOT_SUPPORTED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(811)
     });
-    pub const CDP_VERSION_MISMATCH: CUerror = CUerror(unsafe {
+    pub const r#CDP_VERSION_MISMATCH: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(812)
     });
-    pub const STREAM_CAPTURE_UNSUPPORTED: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_UNSUPPORTED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(900)
     });
-    pub const STREAM_CAPTURE_INVALIDATED: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_INVALIDATED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(901)
     });
-    pub const STREAM_CAPTURE_MERGE: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_MERGE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(902)
     });
-    pub const STREAM_CAPTURE_UNMATCHED: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_UNMATCHED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(903)
     });
-    pub const STREAM_CAPTURE_UNJOINED: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_UNJOINED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(904)
     });
-    pub const STREAM_CAPTURE_ISOLATION: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_ISOLATION: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(905)
     });
-    pub const STREAM_CAPTURE_IMPLICIT: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_IMPLICIT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(906)
     });
-    pub const CAPTURED_EVENT: CUerror = CUerror(unsafe {
+    pub const r#CAPTURED_EVENT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(907)
     });
-    pub const STREAM_CAPTURE_WRONG_THREAD: CUerror = CUerror(unsafe {
+    pub const r#STREAM_CAPTURE_WRONG_THREAD: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(908)
     });
-    pub const TIMEOUT: CUerror = CUerror(unsafe {
+    pub const r#TIMEOUT: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(909)
     });
-    pub const GRAPH_EXEC_UPDATE_FAILURE: CUerror = CUerror(unsafe {
+    pub const r#GRAPH_EXEC_UPDATE_FAILURE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(910)
     });
-    pub const EXTERNAL_DEVICE: CUerror = CUerror(unsafe {
+    pub const r#EXTERNAL_DEVICE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(911)
     });
-    pub const INVALID_CLUSTER_SIZE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_CLUSTER_SIZE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(912)
     });
-    pub const FUNCTION_NOT_LOADED: CUerror = CUerror(unsafe {
+    pub const r#FUNCTION_NOT_LOADED: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(913)
     });
-    pub const INVALID_RESOURCE_TYPE: CUerror = CUerror(unsafe {
+    pub const r#INVALID_RESOURCE_TYPE: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(914)
     });
-    pub const INVALID_RESOURCE_CONFIGURATION: CUerror = CUerror(unsafe {
+    pub const r#INVALID_RESOURCE_CONFIGURATION: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(915)
     });
-    pub const KEY_ROTATION: CUerror = CUerror(unsafe {
+    pub const r#KEY_ROTATION: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(916)
     });
-    pub const UNKNOWN: CUerror = CUerror(unsafe {
+    pub const r#UNKNOWN: CUerror = CUerror(unsafe {
         ::core::num::NonZeroU32::new_unchecked(999)
     });
 }
@@ -8757,213 +8757,223 @@ impl CUerror {
 pub struct CUerror(pub ::core::num::NonZeroU32);
 pub trait CUresultConsts {
     const SUCCESS: CUresult = CUresult::Ok(());
-    const ERROR_INVALID_VALUE: CUresult = CUresult::Err(CUerror::INVALID_VALUE);
-    const ERROR_OUT_OF_MEMORY: CUresult = CUresult::Err(CUerror::OUT_OF_MEMORY);
-    const ERROR_NOT_INITIALIZED: CUresult = CUresult::Err(CUerror::NOT_INITIALIZED);
-    const ERROR_DEINITIALIZED: CUresult = CUresult::Err(CUerror::DEINITIALIZED);
-    const ERROR_PROFILER_DISABLED: CUresult = CUresult::Err(CUerror::PROFILER_DISABLED);
+    const ERROR_INVALID_VALUE: CUresult = CUresult::Err(CUerror::r#INVALID_VALUE);
+    const ERROR_OUT_OF_MEMORY: CUresult = CUresult::Err(CUerror::r#OUT_OF_MEMORY);
+    const ERROR_NOT_INITIALIZED: CUresult = CUresult::Err(CUerror::r#NOT_INITIALIZED);
+    const ERROR_DEINITIALIZED: CUresult = CUresult::Err(CUerror::r#DEINITIALIZED);
+    const ERROR_PROFILER_DISABLED: CUresult = CUresult::Err(
+        CUerror::r#PROFILER_DISABLED,
+    );
     const ERROR_PROFILER_NOT_INITIALIZED: CUresult = CUresult::Err(
-        CUerror::PROFILER_NOT_INITIALIZED,
+        CUerror::r#PROFILER_NOT_INITIALIZED,
     );
     const ERROR_PROFILER_ALREADY_STARTED: CUresult = CUresult::Err(
-        CUerror::PROFILER_ALREADY_STARTED,
+        CUerror::r#PROFILER_ALREADY_STARTED,
     );
     const ERROR_PROFILER_ALREADY_STOPPED: CUresult = CUresult::Err(
-        CUerror::PROFILER_ALREADY_STOPPED,
+        CUerror::r#PROFILER_ALREADY_STOPPED,
     );
-    const ERROR_STUB_LIBRARY: CUresult = CUresult::Err(CUerror::STUB_LIBRARY);
+    const ERROR_STUB_LIBRARY: CUresult = CUresult::Err(CUerror::r#STUB_LIBRARY);
     const ERROR_DEVICE_UNAVAILABLE: CUresult = CUresult::Err(
-        CUerror::DEVICE_UNAVAILABLE,
+        CUerror::r#DEVICE_UNAVAILABLE,
     );
-    const ERROR_NO_DEVICE: CUresult = CUresult::Err(CUerror::NO_DEVICE);
-    const ERROR_INVALID_DEVICE: CUresult = CUresult::Err(CUerror::INVALID_DEVICE);
+    const ERROR_NO_DEVICE: CUresult = CUresult::Err(CUerror::r#NO_DEVICE);
+    const ERROR_INVALID_DEVICE: CUresult = CUresult::Err(CUerror::r#INVALID_DEVICE);
     const ERROR_DEVICE_NOT_LICENSED: CUresult = CUresult::Err(
-        CUerror::DEVICE_NOT_LICENSED,
+        CUerror::r#DEVICE_NOT_LICENSED,
     );
-    const ERROR_INVALID_IMAGE: CUresult = CUresult::Err(CUerror::INVALID_IMAGE);
-    const ERROR_INVALID_CONTEXT: CUresult = CUresult::Err(CUerror::INVALID_CONTEXT);
+    const ERROR_INVALID_IMAGE: CUresult = CUresult::Err(CUerror::r#INVALID_IMAGE);
+    const ERROR_INVALID_CONTEXT: CUresult = CUresult::Err(CUerror::r#INVALID_CONTEXT);
     const ERROR_CONTEXT_ALREADY_CURRENT: CUresult = CUresult::Err(
-        CUerror::CONTEXT_ALREADY_CURRENT,
+        CUerror::r#CONTEXT_ALREADY_CURRENT,
     );
-    const ERROR_MAP_FAILED: CUresult = CUresult::Err(CUerror::MAP_FAILED);
-    const ERROR_UNMAP_FAILED: CUresult = CUresult::Err(CUerror::UNMAP_FAILED);
-    const ERROR_ARRAY_IS_MAPPED: CUresult = CUresult::Err(CUerror::ARRAY_IS_MAPPED);
-    const ERROR_ALREADY_MAPPED: CUresult = CUresult::Err(CUerror::ALREADY_MAPPED);
-    const ERROR_NO_BINARY_FOR_GPU: CUresult = CUresult::Err(CUerror::NO_BINARY_FOR_GPU);
-    const ERROR_ALREADY_ACQUIRED: CUresult = CUresult::Err(CUerror::ALREADY_ACQUIRED);
-    const ERROR_NOT_MAPPED: CUresult = CUresult::Err(CUerror::NOT_MAPPED);
+    const ERROR_MAP_FAILED: CUresult = CUresult::Err(CUerror::r#MAP_FAILED);
+    const ERROR_UNMAP_FAILED: CUresult = CUresult::Err(CUerror::r#UNMAP_FAILED);
+    const ERROR_ARRAY_IS_MAPPED: CUresult = CUresult::Err(CUerror::r#ARRAY_IS_MAPPED);
+    const ERROR_ALREADY_MAPPED: CUresult = CUresult::Err(CUerror::r#ALREADY_MAPPED);
+    const ERROR_NO_BINARY_FOR_GPU: CUresult = CUresult::Err(
+        CUerror::r#NO_BINARY_FOR_GPU,
+    );
+    const ERROR_ALREADY_ACQUIRED: CUresult = CUresult::Err(CUerror::r#ALREADY_ACQUIRED);
+    const ERROR_NOT_MAPPED: CUresult = CUresult::Err(CUerror::r#NOT_MAPPED);
     const ERROR_NOT_MAPPED_AS_ARRAY: CUresult = CUresult::Err(
-        CUerror::NOT_MAPPED_AS_ARRAY,
+        CUerror::r#NOT_MAPPED_AS_ARRAY,
     );
     const ERROR_NOT_MAPPED_AS_POINTER: CUresult = CUresult::Err(
-        CUerror::NOT_MAPPED_AS_POINTER,
+        CUerror::r#NOT_MAPPED_AS_POINTER,
+    );
+    const ERROR_ECC_UNCORRECTABLE: CUresult = CUresult::Err(
+        CUerror::r#ECC_UNCORRECTABLE,
+    );
+    const ERROR_UNSUPPORTED_LIMIT: CUresult = CUresult::Err(
+        CUerror::r#UNSUPPORTED_LIMIT,
     );
-    const ERROR_ECC_UNCORRECTABLE: CUresult = CUresult::Err(CUerror::ECC_UNCORRECTABLE);
-    const ERROR_UNSUPPORTED_LIMIT: CUresult = CUresult::Err(CUerror::UNSUPPORTED_LIMIT);
     const ERROR_CONTEXT_ALREADY_IN_USE: CUresult = CUresult::Err(
-        CUerror::CONTEXT_ALREADY_IN_USE,
+        CUerror::r#CONTEXT_ALREADY_IN_USE,
     );
     const ERROR_PEER_ACCESS_UNSUPPORTED: CUresult = CUresult::Err(
-        CUerror::PEER_ACCESS_UNSUPPORTED,
+        CUerror::r#PEER_ACCESS_UNSUPPORTED,
     );
-    const ERROR_INVALID_PTX: CUresult = CUresult::Err(CUerror::INVALID_PTX);
+    const ERROR_INVALID_PTX: CUresult = CUresult::Err(CUerror::r#INVALID_PTX);
     const ERROR_INVALID_GRAPHICS_CONTEXT: CUresult = CUresult::Err(
-        CUerror::INVALID_GRAPHICS_CONTEXT,
+        CUerror::r#INVALID_GRAPHICS_CONTEXT,
     );
     const ERROR_NVLINK_UNCORRECTABLE: CUresult = CUresult::Err(
-        CUerror::NVLINK_UNCORRECTABLE,
+        CUerror::r#NVLINK_UNCORRECTABLE,
     );
     const ERROR_JIT_COMPILER_NOT_FOUND: CUresult = CUresult::Err(
-        CUerror::JIT_COMPILER_NOT_FOUND,
+        CUerror::r#JIT_COMPILER_NOT_FOUND,
     );
     const ERROR_UNSUPPORTED_PTX_VERSION: CUresult = CUresult::Err(
-        CUerror::UNSUPPORTED_PTX_VERSION,
+        CUerror::r#UNSUPPORTED_PTX_VERSION,
     );
     const ERROR_JIT_COMPILATION_DISABLED: CUresult = CUresult::Err(
-        CUerror::JIT_COMPILATION_DISABLED,
+        CUerror::r#JIT_COMPILATION_DISABLED,
     );
     const ERROR_UNSUPPORTED_EXEC_AFFINITY: CUresult = CUresult::Err(
-        CUerror::UNSUPPORTED_EXEC_AFFINITY,
+        CUerror::r#UNSUPPORTED_EXEC_AFFINITY,
     );
     const ERROR_UNSUPPORTED_DEVSIDE_SYNC: CUresult = CUresult::Err(
-        CUerror::UNSUPPORTED_DEVSIDE_SYNC,
+        CUerror::r#UNSUPPORTED_DEVSIDE_SYNC,
     );
-    const ERROR_CONTAINED: CUresult = CUresult::Err(CUerror::CONTAINED);
-    const ERROR_INVALID_SOURCE: CUresult = CUresult::Err(CUerror::INVALID_SOURCE);
-    const ERROR_FILE_NOT_FOUND: CUresult = CUresult::Err(CUerror::FILE_NOT_FOUND);
+    const ERROR_CONTAINED: CUresult = CUresult::Err(CUerror::r#CONTAINED);
+    const ERROR_INVALID_SOURCE: CUresult = CUresult::Err(CUerror::r#INVALID_SOURCE);
+    const ERROR_FILE_NOT_FOUND: CUresult = CUresult::Err(CUerror::r#FILE_NOT_FOUND);
     const ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: CUresult = CUresult::Err(
-        CUerror::SHARED_OBJECT_SYMBOL_NOT_FOUND,
+        CUerror::r#SHARED_OBJECT_SYMBOL_NOT_FOUND,
     );
     const ERROR_SHARED_OBJECT_INIT_FAILED: CUresult = CUresult::Err(
-        CUerror::SHARED_OBJECT_INIT_FAILED,
+        CUerror::r#SHARED_OBJECT_INIT_FAILED,
     );
-    const ERROR_OPERATING_SYSTEM: CUresult = CUresult::Err(CUerror::OPERATING_SYSTEM);
-    const ERROR_INVALID_HANDLE: CUresult = CUresult::Err(CUerror::INVALID_HANDLE);
-    const ERROR_ILLEGAL_STATE: CUresult = CUresult::Err(CUerror::ILLEGAL_STATE);
-    const ERROR_LOSSY_QUERY: CUresult = CUresult::Err(CUerror::LOSSY_QUERY);
-    const ERROR_NOT_FOUND: CUresult = CUresult::Err(CUerror::NOT_FOUND);
-    const ERROR_NOT_READY: CUresult = CUresult::Err(CUerror::NOT_READY);
-    const ERROR_ILLEGAL_ADDRESS: CUresult = CUresult::Err(CUerror::ILLEGAL_ADDRESS);
+    const ERROR_OPERATING_SYSTEM: CUresult = CUresult::Err(CUerror::r#OPERATING_SYSTEM);
+    const ERROR_INVALID_HANDLE: CUresult = CUresult::Err(CUerror::r#INVALID_HANDLE);
+    const ERROR_ILLEGAL_STATE: CUresult = CUresult::Err(CUerror::r#ILLEGAL_STATE);
+    const ERROR_LOSSY_QUERY: CUresult = CUresult::Err(CUerror::r#LOSSY_QUERY);
+    const ERROR_NOT_FOUND: CUresult = CUresult::Err(CUerror::r#NOT_FOUND);
+    const ERROR_NOT_READY: CUresult = CUresult::Err(CUerror::r#NOT_READY);
+    const ERROR_ILLEGAL_ADDRESS: CUresult = CUresult::Err(CUerror::r#ILLEGAL_ADDRESS);
     const ERROR_LAUNCH_OUT_OF_RESOURCES: CUresult = CUresult::Err(
-        CUerror::LAUNCH_OUT_OF_RESOURCES,
+        CUerror::r#LAUNCH_OUT_OF_RESOURCES,
     );
-    const ERROR_LAUNCH_TIMEOUT: CUresult = CUresult::Err(CUerror::LAUNCH_TIMEOUT);
+    const ERROR_LAUNCH_TIMEOUT: CUresult = CUresult::Err(CUerror::r#LAUNCH_TIMEOUT);
     const ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: CUresult = CUresult::Err(
-        CUerror::LAUNCH_INCOMPATIBLE_TEXTURING,
+        CUerror::r#LAUNCH_INCOMPATIBLE_TEXTURING,
     );
     const ERROR_PEER_ACCESS_ALREADY_ENABLED: CUresult = CUresult::Err(
-        CUerror::PEER_ACCESS_ALREADY_ENABLED,
+        CUerror::r#PEER_ACCESS_ALREADY_ENABLED,
     );
     const ERROR_PEER_ACCESS_NOT_ENABLED: CUresult = CUresult::Err(
-        CUerror::PEER_ACCESS_NOT_ENABLED,
+        CUerror::r#PEER_ACCESS_NOT_ENABLED,
     );
     const ERROR_PRIMARY_CONTEXT_ACTIVE: CUresult = CUresult::Err(
-        CUerror::PRIMARY_CONTEXT_ACTIVE,
+        CUerror::r#PRIMARY_CONTEXT_ACTIVE,
     );
     const ERROR_CONTEXT_IS_DESTROYED: CUresult = CUresult::Err(
-        CUerror::CONTEXT_IS_DESTROYED,
+        CUerror::r#CONTEXT_IS_DESTROYED,
     );
-    const ERROR_ASSERT: CUresult = CUresult::Err(CUerror::ASSERT);
-    const ERROR_TOO_MANY_PEERS: CUresult = CUresult::Err(CUerror::TOO_MANY_PEERS);
+    const ERROR_ASSERT: CUresult = CUresult::Err(CUerror::r#ASSERT);
+    const ERROR_TOO_MANY_PEERS: CUresult = CUresult::Err(CUerror::r#TOO_MANY_PEERS);
     const ERROR_HOST_MEMORY_ALREADY_REGISTERED: CUresult = CUresult::Err(
-        CUerror::HOST_MEMORY_ALREADY_REGISTERED,
+        CUerror::r#HOST_MEMORY_ALREADY_REGISTERED,
     );
     const ERROR_HOST_MEMORY_NOT_REGISTERED: CUresult = CUresult::Err(
-        CUerror::HOST_MEMORY_NOT_REGISTERED,
+        CUerror::r#HOST_MEMORY_NOT_REGISTERED,
     );
     const ERROR_HARDWARE_STACK_ERROR: CUresult = CUresult::Err(
-        CUerror::HARDWARE_STACK_ERROR,
+        CUerror::r#HARDWARE_STACK_ERROR,
     );
     const ERROR_ILLEGAL_INSTRUCTION: CUresult = CUresult::Err(
-        CUerror::ILLEGAL_INSTRUCTION,
+        CUerror::r#ILLEGAL_INSTRUCTION,
     );
     const ERROR_MISALIGNED_ADDRESS: CUresult = CUresult::Err(
-        CUerror::MISALIGNED_ADDRESS,
+        CUerror::r#MISALIGNED_ADDRESS,
     );
     const ERROR_INVALID_ADDRESS_SPACE: CUresult = CUresult::Err(
-        CUerror::INVALID_ADDRESS_SPACE,
+        CUerror::r#INVALID_ADDRESS_SPACE,
     );
-    const ERROR_INVALID_PC: CUresult = CUresult::Err(CUerror::INVALID_PC);
-    const ERROR_LAUNCH_FAILED: CUresult = CUresult::Err(CUerror::LAUNCH_FAILED);
+    const ERROR_INVALID_PC: CUresult = CUresult::Err(CUerror::r#INVALID_PC);
+    const ERROR_LAUNCH_FAILED: CUresult = CUresult::Err(CUerror::r#LAUNCH_FAILED);
     const ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: CUresult = CUresult::Err(
-        CUerror::COOPERATIVE_LAUNCH_TOO_LARGE,
+        CUerror::r#COOPERATIVE_LAUNCH_TOO_LARGE,
     );
     const ERROR_TENSOR_MEMORY_LEAK: CUresult = CUresult::Err(
-        CUerror::TENSOR_MEMORY_LEAK,
+        CUerror::r#TENSOR_MEMORY_LEAK,
     );
-    const ERROR_NOT_PERMITTED: CUresult = CUresult::Err(CUerror::NOT_PERMITTED);
-    const ERROR_NOT_SUPPORTED: CUresult = CUresult::Err(CUerror::NOT_SUPPORTED);
-    const ERROR_SYSTEM_NOT_READY: CUresult = CUresult::Err(CUerror::SYSTEM_NOT_READY);
+    const ERROR_NOT_PERMITTED: CUresult = CUresult::Err(CUerror::r#NOT_PERMITTED);
+    const ERROR_NOT_SUPPORTED: CUresult = CUresult::Err(CUerror::r#NOT_SUPPORTED);
+    const ERROR_SYSTEM_NOT_READY: CUresult = CUresult::Err(CUerror::r#SYSTEM_NOT_READY);
     const ERROR_SYSTEM_DRIVER_MISMATCH: CUresult = CUresult::Err(
-        CUerror::SYSTEM_DRIVER_MISMATCH,
+        CUerror::r#SYSTEM_DRIVER_MISMATCH,
     );
     const ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: CUresult = CUresult::Err(
-        CUerror::COMPAT_NOT_SUPPORTED_ON_DEVICE,
+        CUerror::r#COMPAT_NOT_SUPPORTED_ON_DEVICE,
     );
     const ERROR_MPS_CONNECTION_FAILED: CUresult = CUresult::Err(
-        CUerror::MPS_CONNECTION_FAILED,
+        CUerror::r#MPS_CONNECTION_FAILED,
     );
-    const ERROR_MPS_RPC_FAILURE: CUresult = CUresult::Err(CUerror::MPS_RPC_FAILURE);
+    const ERROR_MPS_RPC_FAILURE: CUresult = CUresult::Err(CUerror::r#MPS_RPC_FAILURE);
     const ERROR_MPS_SERVER_NOT_READY: CUresult = CUresult::Err(
-        CUerror::MPS_SERVER_NOT_READY,
+        CUerror::r#MPS_SERVER_NOT_READY,
     );
     const ERROR_MPS_MAX_CLIENTS_REACHED: CUresult = CUresult::Err(
-        CUerror::MPS_MAX_CLIENTS_REACHED,
+        CUerror::r#MPS_MAX_CLIENTS_REACHED,
     );
     const ERROR_MPS_MAX_CONNECTIONS_REACHED: CUresult = CUresult::Err(
-        CUerror::MPS_MAX_CONNECTIONS_REACHED,
+        CUerror::r#MPS_MAX_CONNECTIONS_REACHED,
     );
     const ERROR_MPS_CLIENT_TERMINATED: CUresult = CUresult::Err(
-        CUerror::MPS_CLIENT_TERMINATED,
+        CUerror::r#MPS_CLIENT_TERMINATED,
+    );
+    const ERROR_CDP_NOT_SUPPORTED: CUresult = CUresult::Err(
+        CUerror::r#CDP_NOT_SUPPORTED,
     );
-    const ERROR_CDP_NOT_SUPPORTED: CUresult = CUresult::Err(CUerror::CDP_NOT_SUPPORTED);
     const ERROR_CDP_VERSION_MISMATCH: CUresult = CUresult::Err(
-        CUerror::CDP_VERSION_MISMATCH,
+        CUerror::r#CDP_VERSION_MISMATCH,
     );
     const ERROR_STREAM_CAPTURE_UNSUPPORTED: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_UNSUPPORTED,
+        CUerror::r#STREAM_CAPTURE_UNSUPPORTED,
     );
     const ERROR_STREAM_CAPTURE_INVALIDATED: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_INVALIDATED,
+        CUerror::r#STREAM_CAPTURE_INVALIDATED,
     );
     const ERROR_STREAM_CAPTURE_MERGE: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_MERGE,
+        CUerror::r#STREAM_CAPTURE_MERGE,
     );
     const ERROR_STREAM_CAPTURE_UNMATCHED: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_UNMATCHED,
+        CUerror::r#STREAM_CAPTURE_UNMATCHED,
     );
     const ERROR_STREAM_CAPTURE_UNJOINED: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_UNJOINED,
+        CUerror::r#STREAM_CAPTURE_UNJOINED,
     );
     const ERROR_STREAM_CAPTURE_ISOLATION: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_ISOLATION,
+        CUerror::r#STREAM_CAPTURE_ISOLATION,
     );
     const ERROR_STREAM_CAPTURE_IMPLICIT: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_IMPLICIT,
+        CUerror::r#STREAM_CAPTURE_IMPLICIT,
     );
-    const ERROR_CAPTURED_EVENT: CUresult = CUresult::Err(CUerror::CAPTURED_EVENT);
+    const ERROR_CAPTURED_EVENT: CUresult = CUresult::Err(CUerror::r#CAPTURED_EVENT);
     const ERROR_STREAM_CAPTURE_WRONG_THREAD: CUresult = CUresult::Err(
-        CUerror::STREAM_CAPTURE_WRONG_THREAD,
+        CUerror::r#STREAM_CAPTURE_WRONG_THREAD,
     );
-    const ERROR_TIMEOUT: CUresult = CUresult::Err(CUerror::TIMEOUT);
+    const ERROR_TIMEOUT: CUresult = CUresult::Err(CUerror::r#TIMEOUT);
     const ERROR_GRAPH_EXEC_UPDATE_FAILURE: CUresult = CUresult::Err(
-        CUerror::GRAPH_EXEC_UPDATE_FAILURE,
+        CUerror::r#GRAPH_EXEC_UPDATE_FAILURE,
     );
-    const ERROR_EXTERNAL_DEVICE: CUresult = CUresult::Err(CUerror::EXTERNAL_DEVICE);
+    const ERROR_EXTERNAL_DEVICE: CUresult = CUresult::Err(CUerror::r#EXTERNAL_DEVICE);
     const ERROR_INVALID_CLUSTER_SIZE: CUresult = CUresult::Err(
-        CUerror::INVALID_CLUSTER_SIZE,
+        CUerror::r#INVALID_CLUSTER_SIZE,
     );
     const ERROR_FUNCTION_NOT_LOADED: CUresult = CUresult::Err(
-        CUerror::FUNCTION_NOT_LOADED,
+        CUerror::r#FUNCTION_NOT_LOADED,
     );
     const ERROR_INVALID_RESOURCE_TYPE: CUresult = CUresult::Err(
-        CUerror::INVALID_RESOURCE_TYPE,
+        CUerror::r#INVALID_RESOURCE_TYPE,
     );
     const ERROR_INVALID_RESOURCE_CONFIGURATION: CUresult = CUresult::Err(
-        CUerror::INVALID_RESOURCE_CONFIGURATION,
+        CUerror::r#INVALID_RESOURCE_CONFIGURATION,
     );
-    const ERROR_KEY_ROTATION: CUresult = CUresult::Err(CUerror::KEY_ROTATION);
-    const ERROR_UNKNOWN: CUresult = CUresult::Err(CUerror::UNKNOWN);
+    const ERROR_KEY_ROTATION: CUresult = CUresult::Err(CUerror::r#KEY_ROTATION);
+    const ERROR_UNKNOWN: CUresult = CUresult::Err(CUerror::r#UNKNOWN);
 }
 impl CUresultConsts for CUresult {}
 #[must_use]
diff --git a/cuda_types/src/nvml.rs b/cuda_types/src/nvml.rs
index d5e9896..4c38f4e 100644
--- a/cuda_types/src/nvml.rs
+++ b/cuda_types/src/nvml.rs
@@ -4706,94 +4706,94 @@ pub struct nvmlPowerSmoothingState_v1_t {
 }
 pub type nvmlPowerSmoothingState_t = nvmlPowerSmoothingState_v1_t;
 impl nvmlError_t {
-    pub const UNINITIALIZED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#UNINITIALIZED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(1)
     });
-    pub const INVALID_ARGUMENT: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#INVALID_ARGUMENT: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(2)
     });
-    pub const NOT_SUPPORTED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#NOT_SUPPORTED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(3)
     });
-    pub const NO_PERMISSION: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#NO_PERMISSION: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(4)
     });
-    pub const ALREADY_INITIALIZED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#ALREADY_INITIALIZED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(5)
     });
-    pub const NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(6)
     });
-    pub const INSUFFICIENT_SIZE: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#INSUFFICIENT_SIZE: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(7)
     });
-    pub const INSUFFICIENT_POWER: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#INSUFFICIENT_POWER: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(8)
     });
-    pub const DRIVER_NOT_LOADED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#DRIVER_NOT_LOADED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(9)
     });
-    pub const TIMEOUT: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#TIMEOUT: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(10)
     });
-    pub const IRQ_ISSUE: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#IRQ_ISSUE: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(11)
     });
-    pub const LIBRARY_NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#LIBRARY_NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(12)
     });
-    pub const FUNCTION_NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#FUNCTION_NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(13)
     });
-    pub const CORRUPTED_INFOROM: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#CORRUPTED_INFOROM: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(14)
     });
-    pub const GPU_IS_LOST: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#GPU_IS_LOST: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(15)
     });
-    pub const RESET_REQUIRED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#RESET_REQUIRED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(16)
     });
-    pub const OPERATING_SYSTEM: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#OPERATING_SYSTEM: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(17)
     });
-    pub const LIB_RM_VERSION_MISMATCH: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#LIB_RM_VERSION_MISMATCH: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(18)
     });
-    pub const IN_USE: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#IN_USE: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(19)
     });
-    pub const MEMORY: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#MEMORY: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(20)
     });
-    pub const NO_DATA: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#NO_DATA: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(21)
     });
-    pub const VGPU_ECC_NOT_SUPPORTED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#VGPU_ECC_NOT_SUPPORTED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(22)
     });
-    pub const INSUFFICIENT_RESOURCES: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#INSUFFICIENT_RESOURCES: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(23)
     });
-    pub const FREQ_NOT_SUPPORTED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#FREQ_NOT_SUPPORTED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(24)
     });
-    pub const ARGUMENT_VERSION_MISMATCH: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#ARGUMENT_VERSION_MISMATCH: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(25)
     });
-    pub const DEPRECATED: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#DEPRECATED: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(26)
     });
-    pub const NOT_READY: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#NOT_READY: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(27)
     });
-    pub const GPU_NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#GPU_NOT_FOUND: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(28)
     });
-    pub const INVALID_STATE: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#INVALID_STATE: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(29)
     });
-    pub const UNKNOWN: nvmlError_t = nvmlError_t(unsafe {
+    pub const r#UNKNOWN: nvmlError_t = nvmlError_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(999)
     });
 }
@@ -4803,75 +4803,77 @@ pub struct nvmlError_t(pub ::core::num::NonZeroU32);
 pub trait nvmlReturn_tConsts {
     const SUCCESS: nvmlReturn_t = nvmlReturn_t::Ok(());
     const ERROR_UNINITIALIZED: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::UNINITIALIZED,
+        nvmlError_t::r#UNINITIALIZED,
     );
     const ERROR_INVALID_ARGUMENT: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::INVALID_ARGUMENT,
+        nvmlError_t::r#INVALID_ARGUMENT,
     );
     const ERROR_NOT_SUPPORTED: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::NOT_SUPPORTED,
+        nvmlError_t::r#NOT_SUPPORTED,
     );
     const ERROR_NO_PERMISSION: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::NO_PERMISSION,
+        nvmlError_t::r#NO_PERMISSION,
     );
     const ERROR_ALREADY_INITIALIZED: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::ALREADY_INITIALIZED,
+        nvmlError_t::r#ALREADY_INITIALIZED,
     );
-    const ERROR_NOT_FOUND: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::NOT_FOUND);
+    const ERROR_NOT_FOUND: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#NOT_FOUND);
     const ERROR_INSUFFICIENT_SIZE: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::INSUFFICIENT_SIZE,
+        nvmlError_t::r#INSUFFICIENT_SIZE,
     );
     const ERROR_INSUFFICIENT_POWER: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::INSUFFICIENT_POWER,
+        nvmlError_t::r#INSUFFICIENT_POWER,
     );
     const ERROR_DRIVER_NOT_LOADED: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::DRIVER_NOT_LOADED,
+        nvmlError_t::r#DRIVER_NOT_LOADED,
     );
-    const ERROR_TIMEOUT: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::TIMEOUT);
-    const ERROR_IRQ_ISSUE: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::IRQ_ISSUE);
+    const ERROR_TIMEOUT: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#TIMEOUT);
+    const ERROR_IRQ_ISSUE: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#IRQ_ISSUE);
     const ERROR_LIBRARY_NOT_FOUND: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::LIBRARY_NOT_FOUND,
+        nvmlError_t::r#LIBRARY_NOT_FOUND,
     );
     const ERROR_FUNCTION_NOT_FOUND: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::FUNCTION_NOT_FOUND,
+        nvmlError_t::r#FUNCTION_NOT_FOUND,
     );
     const ERROR_CORRUPTED_INFOROM: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::CORRUPTED_INFOROM,
+        nvmlError_t::r#CORRUPTED_INFOROM,
+    );
+    const ERROR_GPU_IS_LOST: nvmlReturn_t = nvmlReturn_t::Err(
+        nvmlError_t::r#GPU_IS_LOST,
     );
-    const ERROR_GPU_IS_LOST: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::GPU_IS_LOST);
     const ERROR_RESET_REQUIRED: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::RESET_REQUIRED,
+        nvmlError_t::r#RESET_REQUIRED,
     );
     const ERROR_OPERATING_SYSTEM: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::OPERATING_SYSTEM,
+        nvmlError_t::r#OPERATING_SYSTEM,
     );
     const ERROR_LIB_RM_VERSION_MISMATCH: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::LIB_RM_VERSION_MISMATCH,
+        nvmlError_t::r#LIB_RM_VERSION_MISMATCH,
     );
-    const ERROR_IN_USE: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::IN_USE);
-    const ERROR_MEMORY: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::MEMORY);
-    const ERROR_NO_DATA: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::NO_DATA);
+    const ERROR_IN_USE: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#IN_USE);
+    const ERROR_MEMORY: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#MEMORY);
+    const ERROR_NO_DATA: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#NO_DATA);
     const ERROR_VGPU_ECC_NOT_SUPPORTED: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::VGPU_ECC_NOT_SUPPORTED,
+        nvmlError_t::r#VGPU_ECC_NOT_SUPPORTED,
     );
     const ERROR_INSUFFICIENT_RESOURCES: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::INSUFFICIENT_RESOURCES,
+        nvmlError_t::r#INSUFFICIENT_RESOURCES,
     );
     const ERROR_FREQ_NOT_SUPPORTED: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::FREQ_NOT_SUPPORTED,
+        nvmlError_t::r#FREQ_NOT_SUPPORTED,
     );
     const ERROR_ARGUMENT_VERSION_MISMATCH: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::ARGUMENT_VERSION_MISMATCH,
+        nvmlError_t::r#ARGUMENT_VERSION_MISMATCH,
     );
-    const ERROR_DEPRECATED: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::DEPRECATED);
-    const ERROR_NOT_READY: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::NOT_READY);
+    const ERROR_DEPRECATED: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#DEPRECATED);
+    const ERROR_NOT_READY: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#NOT_READY);
     const ERROR_GPU_NOT_FOUND: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::GPU_NOT_FOUND,
+        nvmlError_t::r#GPU_NOT_FOUND,
     );
     const ERROR_INVALID_STATE: nvmlReturn_t = nvmlReturn_t::Err(
-        nvmlError_t::INVALID_STATE,
+        nvmlError_t::r#INVALID_STATE,
     );
-    const ERROR_UNKNOWN: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::UNKNOWN);
+    const ERROR_UNKNOWN: nvmlReturn_t = nvmlReturn_t::Err(nvmlError_t::r#UNKNOWN);
 }
 impl nvmlReturn_tConsts for nvmlReturn_t {}
 #[must_use]
diff --git a/ext/hip_runtime-sys/src/lib.rs b/ext/hip_runtime-sys/src/lib.rs
index 9ce281e..0110292 100644
--- a/ext/hip_runtime-sys/src/lib.rs
+++ b/ext/hip_runtime-sys/src/lib.rs
@@ -13215,241 +13215,241 @@ extern "C" {
     ) -> hipError_t;
 }
 impl hipErrorCode_t {
-    pub const InvalidValue: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidValue: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(1)
     });
-    pub const OutOfMemory: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#OutOfMemory: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(2)
     });
-    pub const MemoryAllocation: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#MemoryAllocation: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(2)
     });
-    pub const NotInitialized: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NotInitialized: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(3)
     });
-    pub const InitializationError: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InitializationError: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(3)
     });
-    pub const Deinitialized: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#Deinitialized: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(4)
     });
-    pub const ProfilerDisabled: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ProfilerDisabled: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(5)
     });
-    pub const ProfilerNotInitialized: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ProfilerNotInitialized: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(6)
     });
-    pub const ProfilerAlreadyStarted: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ProfilerAlreadyStarted: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(7)
     });
-    pub const ProfilerAlreadyStopped: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ProfilerAlreadyStopped: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(8)
     });
-    pub const InvalidConfiguration: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidConfiguration: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(9)
     });
-    pub const InvalidPitchValue: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidPitchValue: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(12)
     });
-    pub const InvalidSymbol: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidSymbol: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(13)
     });
-    pub const InvalidDevicePointer: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidDevicePointer: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(17)
     });
-    pub const InvalidMemcpyDirection: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidMemcpyDirection: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(21)
     });
-    pub const InsufficientDriver: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InsufficientDriver: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(35)
     });
-    pub const MissingConfiguration: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#MissingConfiguration: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(52)
     });
-    pub const PriorLaunchFailure: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#PriorLaunchFailure: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(53)
     });
-    pub const InvalidDeviceFunction: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidDeviceFunction: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(98)
     });
-    pub const NoDevice: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NoDevice: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(100)
     });
-    pub const InvalidDevice: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidDevice: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(101)
     });
-    pub const InvalidImage: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidImage: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(200)
     });
-    pub const InvalidContext: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidContext: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(201)
     });
-    pub const ContextAlreadyCurrent: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ContextAlreadyCurrent: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(202)
     });
-    pub const MapFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#MapFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(205)
     });
-    pub const MapBufferObjectFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#MapBufferObjectFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(205)
     });
-    pub const UnmapFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#UnmapFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(206)
     });
-    pub const ArrayIsMapped: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ArrayIsMapped: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(207)
     });
-    pub const AlreadyMapped: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#AlreadyMapped: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(208)
     });
-    pub const NoBinaryForGpu: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NoBinaryForGpu: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(209)
     });
-    pub const AlreadyAcquired: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#AlreadyAcquired: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(210)
     });
-    pub const NotMapped: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NotMapped: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(211)
     });
-    pub const NotMappedAsArray: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NotMappedAsArray: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(212)
     });
-    pub const NotMappedAsPointer: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NotMappedAsPointer: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(213)
     });
-    pub const ECCNotCorrectable: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ECCNotCorrectable: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(214)
     });
-    pub const UnsupportedLimit: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#UnsupportedLimit: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(215)
     });
-    pub const ContextAlreadyInUse: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ContextAlreadyInUse: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(216)
     });
-    pub const PeerAccessUnsupported: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#PeerAccessUnsupported: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(217)
     });
-    pub const InvalidKernelFile: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidKernelFile: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(218)
     });
-    pub const InvalidGraphicsContext: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidGraphicsContext: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(219)
     });
-    pub const InvalidSource: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidSource: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(300)
     });
-    pub const FileNotFound: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#FileNotFound: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(301)
     });
-    pub const SharedObjectSymbolNotFound: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#SharedObjectSymbolNotFound: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(302)
     });
-    pub const SharedObjectInitFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#SharedObjectInitFailed: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(303)
     });
-    pub const OperatingSystem: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#OperatingSystem: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(304)
     });
-    pub const InvalidHandle: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidHandle: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(400)
     });
-    pub const InvalidResourceHandle: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidResourceHandle: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(400)
     });
-    pub const IllegalState: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#IllegalState: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(401)
     });
-    pub const NotFound: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NotFound: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(500)
     });
-    pub const NotReady: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NotReady: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(600)
     });
-    pub const IllegalAddress: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#IllegalAddress: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(700)
     });
-    pub const LaunchOutOfResources: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#LaunchOutOfResources: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(701)
     });
-    pub const LaunchTimeOut: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#LaunchTimeOut: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(702)
     });
-    pub const PeerAccessAlreadyEnabled: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#PeerAccessAlreadyEnabled: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(704)
     });
-    pub const PeerAccessNotEnabled: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#PeerAccessNotEnabled: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(705)
     });
-    pub const SetOnActiveProcess: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#SetOnActiveProcess: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(708)
     });
-    pub const ContextIsDestroyed: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#ContextIsDestroyed: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(709)
     });
-    pub const Assert: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#Assert: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(710)
     });
-    pub const HostMemoryAlreadyRegistered: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#HostMemoryAlreadyRegistered: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(712)
     });
-    pub const HostMemoryNotRegistered: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#HostMemoryNotRegistered: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(713)
     });
-    pub const LaunchFailure: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#LaunchFailure: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(719)
     });
-    pub const CooperativeLaunchTooLarge: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#CooperativeLaunchTooLarge: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(720)
     });
-    pub const NotSupported: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#NotSupported: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(801)
     });
-    pub const StreamCaptureUnsupported: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureUnsupported: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(900)
     });
-    pub const StreamCaptureInvalidated: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureInvalidated: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(901)
     });
-    pub const StreamCaptureMerge: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureMerge: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(902)
     });
-    pub const StreamCaptureUnmatched: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureUnmatched: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(903)
     });
-    pub const StreamCaptureUnjoined: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureUnjoined: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(904)
     });
-    pub const StreamCaptureIsolation: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureIsolation: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(905)
     });
-    pub const StreamCaptureImplicit: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureImplicit: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(906)
     });
-    pub const CapturedEvent: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#CapturedEvent: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(907)
     });
-    pub const StreamCaptureWrongThread: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#StreamCaptureWrongThread: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(908)
     });
-    pub const GraphExecUpdateFailure: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#GraphExecUpdateFailure: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(910)
     });
-    pub const InvalidChannelDescriptor: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidChannelDescriptor: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(911)
     });
-    pub const InvalidTexture: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#InvalidTexture: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(912)
     });
-    pub const Unknown: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#Unknown: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(999)
     });
-    pub const RuntimeMemory: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#RuntimeMemory: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(1052)
     });
-    pub const RuntimeOther: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#RuntimeOther: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(1053)
     });
-    pub const Tbd: hipErrorCode_t = hipErrorCode_t(unsafe {
+    pub const r#Tbd: hipErrorCode_t = hipErrorCode_t(unsafe {
         ::core::num::NonZeroU32::new_unchecked(1054)
     });
 }
@@ -13458,211 +13458,223 @@ impl hipErrorCode_t {
 pub struct hipErrorCode_t(pub ::core::num::NonZeroU32);
 pub trait hipError_tConsts {
     const Success: hipError_t = hipError_t::Ok(());
-    const ErrorInvalidValue: hipError_t = hipError_t::Err(hipErrorCode_t::InvalidValue);
-    const ErrorOutOfMemory: hipError_t = hipError_t::Err(hipErrorCode_t::OutOfMemory);
+    const ErrorInvalidValue: hipError_t = hipError_t::Err(
+        hipErrorCode_t::r#InvalidValue,
+    );
+    const ErrorOutOfMemory: hipError_t = hipError_t::Err(hipErrorCode_t::r#OutOfMemory);
     const ErrorMemoryAllocation: hipError_t = hipError_t::Err(
-        hipErrorCode_t::MemoryAllocation,
+        hipErrorCode_t::r#MemoryAllocation,
     );
     const ErrorNotInitialized: hipError_t = hipError_t::Err(
-        hipErrorCode_t::NotInitialized,
+        hipErrorCode_t::r#NotInitialized,
     );
     const ErrorInitializationError: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InitializationError,
+        hipErrorCode_t::r#InitializationError,
     );
     const ErrorDeinitialized: hipError_t = hipError_t::Err(
-        hipErrorCode_t::Deinitialized,
+        hipErrorCode_t::r#Deinitialized,
     );
     const ErrorProfilerDisabled: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ProfilerDisabled,
+        hipErrorCode_t::r#ProfilerDisabled,
     );
     const ErrorProfilerNotInitialized: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ProfilerNotInitialized,
+        hipErrorCode_t::r#ProfilerNotInitialized,
     );
     const ErrorProfilerAlreadyStarted: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ProfilerAlreadyStarted,
+        hipErrorCode_t::r#ProfilerAlreadyStarted,
     );
     const ErrorProfilerAlreadyStopped: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ProfilerAlreadyStopped,
+        hipErrorCode_t::r#ProfilerAlreadyStopped,
     );
     const ErrorInvalidConfiguration: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidConfiguration,
+        hipErrorCode_t::r#InvalidConfiguration,
     );
     const ErrorInvalidPitchValue: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidPitchValue,
+        hipErrorCode_t::r#InvalidPitchValue,
     );
     const ErrorInvalidSymbol: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidSymbol,
+        hipErrorCode_t::r#InvalidSymbol,
     );
     const ErrorInvalidDevicePointer: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidDevicePointer,
+        hipErrorCode_t::r#InvalidDevicePointer,
     );
     const ErrorInvalidMemcpyDirection: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidMemcpyDirection,
+        hipErrorCode_t::r#InvalidMemcpyDirection,
     );
     const ErrorInsufficientDriver: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InsufficientDriver,
+        hipErrorCode_t::r#InsufficientDriver,
     );
     const ErrorMissingConfiguration: hipError_t = hipError_t::Err(
-        hipErrorCode_t::MissingConfiguration,
+        hipErrorCode_t::r#MissingConfiguration,
     );
     const ErrorPriorLaunchFailure: hipError_t = hipError_t::Err(
-        hipErrorCode_t::PriorLaunchFailure,
+        hipErrorCode_t::r#PriorLaunchFailure,
     );
     const ErrorInvalidDeviceFunction: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidDeviceFunction,
+        hipErrorCode_t::r#InvalidDeviceFunction,
     );
-    const ErrorNoDevice: hipError_t = hipError_t::Err(hipErrorCode_t::NoDevice);
+    const ErrorNoDevice: hipError_t = hipError_t::Err(hipErrorCode_t::r#NoDevice);
     const ErrorInvalidDevice: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidDevice,
+        hipErrorCode_t::r#InvalidDevice,
+    );
+    const ErrorInvalidImage: hipError_t = hipError_t::Err(
+        hipErrorCode_t::r#InvalidImage,
     );
-    const ErrorInvalidImage: hipError_t = hipError_t::Err(hipErrorCode_t::InvalidImage);
     const ErrorInvalidContext: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidContext,
+        hipErrorCode_t::r#InvalidContext,
     );
     const ErrorContextAlreadyCurrent: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ContextAlreadyCurrent,
+        hipErrorCode_t::r#ContextAlreadyCurrent,
     );
-    const ErrorMapFailed: hipError_t = hipError_t::Err(hipErrorCode_t::MapFailed);
+    const ErrorMapFailed: hipError_t = hipError_t::Err(hipErrorCode_t::r#MapFailed);
     const ErrorMapBufferObjectFailed: hipError_t = hipError_t::Err(
-        hipErrorCode_t::MapBufferObjectFailed,
+        hipErrorCode_t::r#MapBufferObjectFailed,
     );
-    const ErrorUnmapFailed: hipError_t = hipError_t::Err(hipErrorCode_t::UnmapFailed);
+    const ErrorUnmapFailed: hipError_t = hipError_t::Err(hipErrorCode_t::r#UnmapFailed);
     const ErrorArrayIsMapped: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ArrayIsMapped,
+        hipErrorCode_t::r#ArrayIsMapped,
     );
     const ErrorAlreadyMapped: hipError_t = hipError_t::Err(
-        hipErrorCode_t::AlreadyMapped,
+        hipErrorCode_t::r#AlreadyMapped,
     );
     const ErrorNoBinaryForGpu: hipError_t = hipError_t::Err(
-        hipErrorCode_t::NoBinaryForGpu,
+        hipErrorCode_t::r#NoBinaryForGpu,
     );
     const ErrorAlreadyAcquired: hipError_t = hipError_t::Err(
-        hipErrorCode_t::AlreadyAcquired,
+        hipErrorCode_t::r#AlreadyAcquired,
     );
-    const ErrorNotMapped: hipError_t = hipError_t::Err(hipErrorCode_t::NotMapped);
+    const ErrorNotMapped: hipError_t = hipError_t::Err(hipErrorCode_t::r#NotMapped);
     const ErrorNotMappedAsArray: hipError_t = hipError_t::Err(
-        hipErrorCode_t::NotMappedAsArray,
+        hipErrorCode_t::r#NotMappedAsArray,
     );
     const ErrorNotMappedAsPointer: hipError_t = hipError_t::Err(
-        hipErrorCode_t::NotMappedAsPointer,
+        hipErrorCode_t::r#NotMappedAsPointer,
     );
     const ErrorECCNotCorrectable: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ECCNotCorrectable,
+        hipErrorCode_t::r#ECCNotCorrectable,
     );
     const ErrorUnsupportedLimit: hipError_t = hipError_t::Err(
-        hipErrorCode_t::UnsupportedLimit,
+        hipErrorCode_t::r#UnsupportedLimit,
     );
     const ErrorContextAlreadyInUse: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ContextAlreadyInUse,
+        hipErrorCode_t::r#ContextAlreadyInUse,
     );
     const ErrorPeerAccessUnsupported: hipError_t = hipError_t::Err(
-        hipErrorCode_t::PeerAccessUnsupported,
+        hipErrorCode_t::r#PeerAccessUnsupported,
     );
     const ErrorInvalidKernelFile: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidKernelFile,
+        hipErrorCode_t::r#InvalidKernelFile,
     );
     const ErrorInvalidGraphicsContext: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidGraphicsContext,
+        hipErrorCode_t::r#InvalidGraphicsContext,
     );
     const ErrorInvalidSource: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidSource,
+        hipErrorCode_t::r#InvalidSource,
+    );
+    const ErrorFileNotFound: hipError_t = hipError_t::Err(
+        hipErrorCode_t::r#FileNotFound,
     );
-    const ErrorFileNotFound: hipError_t = hipError_t::Err(hipErrorCode_t::FileNotFound);
     const ErrorSharedObjectSymbolNotFound: hipError_t = hipError_t::Err(
-        hipErrorCode_t::SharedObjectSymbolNotFound,
+        hipErrorCode_t::r#SharedObjectSymbolNotFound,
     );
     const ErrorSharedObjectInitFailed: hipError_t = hipError_t::Err(
-        hipErrorCode_t::SharedObjectInitFailed,
+        hipErrorCode_t::r#SharedObjectInitFailed,
     );
     const ErrorOperatingSystem: hipError_t = hipError_t::Err(
-        hipErrorCode_t::OperatingSystem,
+        hipErrorCode_t::r#OperatingSystem,
     );
     const ErrorInvalidHandle: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidHandle,
+        hipErrorCode_t::r#InvalidHandle,
     );
     const ErrorInvalidResourceHandle: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidResourceHandle,
+        hipErrorCode_t::r#InvalidResourceHandle,
     );
-    const ErrorIllegalState: hipError_t = hipError_t::Err(hipErrorCode_t::IllegalState);
-    const ErrorNotFound: hipError_t = hipError_t::Err(hipErrorCode_t::NotFound);
-    const ErrorNotReady: hipError_t = hipError_t::Err(hipErrorCode_t::NotReady);
+    const ErrorIllegalState: hipError_t = hipError_t::Err(
+        hipErrorCode_t::r#IllegalState,
+    );
+    const ErrorNotFound: hipError_t = hipError_t::Err(hipErrorCode_t::r#NotFound);
+    const ErrorNotReady: hipError_t = hipError_t::Err(hipErrorCode_t::r#NotReady);
     const ErrorIllegalAddress: hipError_t = hipError_t::Err(
-        hipErrorCode_t::IllegalAddress,
+        hipErrorCode_t::r#IllegalAddress,
     );
     const ErrorLaunchOutOfResources: hipError_t = hipError_t::Err(
-        hipErrorCode_t::LaunchOutOfResources,
+        hipErrorCode_t::r#LaunchOutOfResources,
     );
     const ErrorLaunchTimeOut: hipError_t = hipError_t::Err(
-        hipErrorCode_t::LaunchTimeOut,
+        hipErrorCode_t::r#LaunchTimeOut,
     );
     const ErrorPeerAccessAlreadyEnabled: hipError_t = hipError_t::Err(
-        hipErrorCode_t::PeerAccessAlreadyEnabled,
+        hipErrorCode_t::r#PeerAccessAlreadyEnabled,
     );
     const ErrorPeerAccessNotEnabled: hipError_t = hipError_t::Err(
-        hipErrorCode_t::PeerAccessNotEnabled,
+        hipErrorCode_t::r#PeerAccessNotEnabled,
     );
     const ErrorSetOnActiveProcess: hipError_t = hipError_t::Err(
-        hipErrorCode_t::SetOnActiveProcess,
+        hipErrorCode_t::r#SetOnActiveProcess,
     );
     const ErrorContextIsDestroyed: hipError_t = hipError_t::Err(
-        hipErrorCode_t::ContextIsDestroyed,
+        hipErrorCode_t::r#ContextIsDestroyed,
     );
-    const ErrorAssert: hipError_t = hipError_t::Err(hipErrorCode_t::Assert);
+    const ErrorAssert: hipError_t = hipError_t::Err(hipErrorCode_t::r#Assert);
     const ErrorHostMemoryAlreadyRegistered: hipError_t = hipError_t::Err(
-        hipErrorCode_t::HostMemoryAlreadyRegistered,
+        hipErrorCode_t::r#HostMemoryAlreadyRegistered,
     );
     const ErrorHostMemoryNotRegistered: hipError_t = hipError_t::Err(
-        hipErrorCode_t::HostMemoryNotRegistered,
+        hipErrorCode_t::r#HostMemoryNotRegistered,
     );
     const ErrorLaunchFailure: hipError_t = hipError_t::Err(
-        hipErrorCode_t::LaunchFailure,
+        hipErrorCode_t::r#LaunchFailure,
     );
     const ErrorCooperativeLaunchTooLarge: hipError_t = hipError_t::Err(
-        hipErrorCode_t::CooperativeLaunchTooLarge,
+        hipErrorCode_t::r#CooperativeLaunchTooLarge,
+    );
+    const ErrorNotSupported: hipError_t = hipError_t::Err(
+        hipErrorCode_t::r#NotSupported,
     );
-    const ErrorNotSupported: hipError_t = hipError_t::Err(hipErrorCode_t::NotSupported);
     const ErrorStreamCaptureUnsupported: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureUnsupported,
+        hipErrorCode_t::r#StreamCaptureUnsupported,
     );
     const ErrorStreamCaptureInvalidated: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureInvalidated,
+        hipErrorCode_t::r#StreamCaptureInvalidated,
     );
     const ErrorStreamCaptureMerge: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureMerge,
+        hipErrorCode_t::r#StreamCaptureMerge,
     );
     const ErrorStreamCaptureUnmatched: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureUnmatched,
+        hipErrorCode_t::r#StreamCaptureUnmatched,
     );
     const ErrorStreamCaptureUnjoined: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureUnjoined,
+        hipErrorCode_t::r#StreamCaptureUnjoined,
     );
     const ErrorStreamCaptureIsolation: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureIsolation,
+        hipErrorCode_t::r#StreamCaptureIsolation,
     );
     const ErrorStreamCaptureImplicit: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureImplicit,
+        hipErrorCode_t::r#StreamCaptureImplicit,
     );
     const ErrorCapturedEvent: hipError_t = hipError_t::Err(
-        hipErrorCode_t::CapturedEvent,
+        hipErrorCode_t::r#CapturedEvent,
     );
     const ErrorStreamCaptureWrongThread: hipError_t = hipError_t::Err(
-        hipErrorCode_t::StreamCaptureWrongThread,
+        hipErrorCode_t::r#StreamCaptureWrongThread,
     );
     const ErrorGraphExecUpdateFailure: hipError_t = hipError_t::Err(
-        hipErrorCode_t::GraphExecUpdateFailure,
+        hipErrorCode_t::r#GraphExecUpdateFailure,
     );
     const ErrorInvalidChannelDescriptor: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidChannelDescriptor,
+        hipErrorCode_t::r#InvalidChannelDescriptor,
     );
     const ErrorInvalidTexture: hipError_t = hipError_t::Err(
-        hipErrorCode_t::InvalidTexture,
+        hipErrorCode_t::r#InvalidTexture,
     );
-    const ErrorUnknown: hipError_t = hipError_t::Err(hipErrorCode_t::Unknown);
+    const ErrorUnknown: hipError_t = hipError_t::Err(hipErrorCode_t::r#Unknown);
     const ErrorRuntimeMemory: hipError_t = hipError_t::Err(
-        hipErrorCode_t::RuntimeMemory,
+        hipErrorCode_t::r#RuntimeMemory,
     );
-    const ErrorRuntimeOther: hipError_t = hipError_t::Err(hipErrorCode_t::RuntimeOther);
-    const ErrorTbd: hipError_t = hipError_t::Err(hipErrorCode_t::Tbd);
+    const ErrorRuntimeOther: hipError_t = hipError_t::Err(
+        hipErrorCode_t::r#RuntimeOther,
+    );
+    const ErrorTbd: hipError_t = hipError_t::Err(hipErrorCode_t::r#Tbd);
 }
 impl hipError_tConsts for hipError_t {}
 #[must_use]
diff --git a/ext/rocblas-sys/Cargo.toml b/ext/rocblas-sys/Cargo.toml
new file mode 100644
index 0000000..b3ac79a
--- /dev/null
+++ b/ext/rocblas-sys/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "rocblas-sys"
+version = "0.1.0"
+authors = ["Violet <c01368481@gmail.com>"]
+edition = "2021"
+
+[lib]
+
+[dependencies]
+hip_runtime-sys = { version = "0.0.0", path = "../hip_runtime-sys" }
diff --git a/ext/rocblas-sys/build.rs b/ext/rocblas-sys/build.rs
new file mode 100644
index 0000000..8480042
--- /dev/null
+++ b/ext/rocblas-sys/build.rs
@@ -0,0 +1,9 @@
+use std::env::VarError;
+
+fn main() -> Result<(), VarError> {
+    if !cfg!(windows) {
+        println!("cargo:rustc-link-lib=dylib=rocblas");
+        println!("cargo:rustc-link-search=native=/opt/rocm/lib/");
+    }
+    Ok(())
+}
diff --git a/ext/rocblas-sys/src/lib.rs b/ext/rocblas-sys/src/lib.rs
new file mode 100644
index 0000000..1a4c864
--- /dev/null
+++ b/ext/rocblas-sys/src/lib.rs
@@ -0,0 +1,31561 @@
+// Generated automatically by zluda_bindgen
+// DO NOT EDIT MANUALLY
+#![allow(warnings)]
+/// \brief Struct to represent a 16 bit Brain floating-point number.
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_bfloat16 {
+    pub data: u16,
+}
+/// \brief Struct to represent a 8 bit floating-point number.
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_f8 {
+    pub data: u8,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_bf8 {
+    pub data: u8,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct _rocblas_handle {
+    _unused: [u8; 0],
+}
+/** \brief rocblas_handle is a structure holding the rocblas library context.
+ It must be initialized using rocblas_create_handle(),
+ and the returned handle must be passed
+ to all subsequent library function calls.
+ It should be destroyed at the end using rocblas_destroy_handle().*/
+#[repr(transparent)]
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_handle(pub *mut _rocblas_handle);
+/// \brief Opaque base class for device memory allocation
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rocblas_device_malloc_base {
+    _unused: [u8; 0],
+}
+pub type rocblas_int = i32;
+/// \brief Stride between matrices or vectors in strided_batched functions
+pub type rocblas_stride = i64;
+/// \brief Single precision floating point type
+pub type rocblas_float = f32;
+/// \brief Double precision floating point type
+pub type rocblas_double = f64;
+/// \brief Structure definition for rocblas_half
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_half {
+    pub data: u16,
+}
+/// \brief Struct to represent a complex number with single precision real and imaginary parts.
+#[repr(C)]
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct rocblas_float_complex {
+    pub x: f32,
+    pub y: f32,
+}
+/// \brief Struct to represent a complex number with double precision real and imaginary parts.
+#[repr(C)]
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct rocblas_double_complex {
+    pub x: f64,
+    pub y: f64,
+}
+impl rocblas_operation_ {
+    ///< Operate with the matrix.
+    pub const rocblas_operation_none: rocblas_operation_ = rocblas_operation_(111);
+}
+impl rocblas_operation_ {
+    ///< Operate with the transpose of the matrix.
+    pub const rocblas_operation_transpose: rocblas_operation_ = rocblas_operation_(112);
+}
+impl rocblas_operation_ {
+    pub const rocblas_operation_conjugate_transpose: rocblas_operation_ = rocblas_operation_(
+        113,
+    );
+}
+#[repr(transparent)]
+/// \brief Used to specify whether the matrix is to be transposed or not.
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_operation_(pub ::core::ffi::c_uint);
+/// \brief Used to specify whether the matrix is to be transposed or not.
+pub use self::rocblas_operation_ as rocblas_operation;
+impl rocblas_fill_ {
+    ///< Upper triangle.
+    pub const rocblas_fill_upper: rocblas_fill_ = rocblas_fill_(121);
+}
+impl rocblas_fill_ {
+    ///< Lower triangle.
+    pub const rocblas_fill_lower: rocblas_fill_ = rocblas_fill_(122);
+}
+impl rocblas_fill_ {
+    pub const rocblas_fill_full: rocblas_fill_ = rocblas_fill_(123);
+}
+#[repr(transparent)]
+/** \brief Used by the Hermitian, symmetric and triangular matrix
+ routines to specify whether the upper, or lower triangle is being referenced.*/
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_fill_(pub ::core::ffi::c_uint);
+/** \brief Used by the Hermitian, symmetric and triangular matrix
+ routines to specify whether the upper, or lower triangle is being referenced.*/
+pub use self::rocblas_fill_ as rocblas_fill;
+impl rocblas_diagonal_ {
+    ///< Non-unit triangular.
+    pub const rocblas_diagonal_non_unit: rocblas_diagonal_ = rocblas_diagonal_(131);
+}
+impl rocblas_diagonal_ {
+    ///< Unit triangular.
+    pub const rocblas_diagonal_unit: rocblas_diagonal_ = rocblas_diagonal_(132);
+}
+#[repr(transparent)]
+/** \brief It is used by the triangular matrix routines to specify whether the
+ matrix is unit triangular.*/
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_diagonal_(pub ::core::ffi::c_uint);
+/** \brief It is used by the triangular matrix routines to specify whether the
+ matrix is unit triangular.*/
+pub use self::rocblas_diagonal_ as rocblas_diagonal;
+impl rocblas_side_ {
+    /**< Multiply general matrix by symmetric,
+Hermitian, or triangular matrix on the left.*/
+    pub const rocblas_side_left: rocblas_side_ = rocblas_side_(141);
+}
+impl rocblas_side_ {
+    /**< Multiply general matrix by symmetric,
+Hermitian, or triangular matrix on the right.*/
+    pub const rocblas_side_right: rocblas_side_ = rocblas_side_(142);
+}
+impl rocblas_side_ {
+    pub const rocblas_side_both: rocblas_side_ = rocblas_side_(143);
+}
+#[repr(transparent)]
+/// \brief Indicates the side matrix A is located relative to matrix B during multiplication.
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_side_(pub ::core::ffi::c_uint);
+/// \brief Indicates the side matrix A is located relative to matrix B during multiplication.
+pub use self::rocblas_side_ as rocblas_side;
+impl rocblas_datatype_ {
+    ///< 16-bit floating point, real
+    pub const rocblas_datatype_f16_r: rocblas_datatype_ = rocblas_datatype_(150);
+}
+impl rocblas_datatype_ {
+    ///< 32-bit floating point, real
+    pub const rocblas_datatype_f32_r: rocblas_datatype_ = rocblas_datatype_(151);
+}
+impl rocblas_datatype_ {
+    ///< 64-bit floating point, real
+    pub const rocblas_datatype_f64_r: rocblas_datatype_ = rocblas_datatype_(152);
+}
+impl rocblas_datatype_ {
+    ///< 16-bit floating point, complex
+    pub const rocblas_datatype_f16_c: rocblas_datatype_ = rocblas_datatype_(153);
+}
+impl rocblas_datatype_ {
+    ///< 32-bit floating point, complex
+    pub const rocblas_datatype_f32_c: rocblas_datatype_ = rocblas_datatype_(154);
+}
+impl rocblas_datatype_ {
+    ///< 64-bit floating point, complex
+    pub const rocblas_datatype_f64_c: rocblas_datatype_ = rocblas_datatype_(155);
+}
+impl rocblas_datatype_ {
+    ///<  8-bit signed integer, real
+    pub const rocblas_datatype_i8_r: rocblas_datatype_ = rocblas_datatype_(160);
+}
+impl rocblas_datatype_ {
+    ///<  8-bit unsigned integer, real
+    pub const rocblas_datatype_u8_r: rocblas_datatype_ = rocblas_datatype_(161);
+}
+impl rocblas_datatype_ {
+    ///< 32-bit signed integer, real
+    pub const rocblas_datatype_i32_r: rocblas_datatype_ = rocblas_datatype_(162);
+}
+impl rocblas_datatype_ {
+    ///< 32-bit unsigned integer, real
+    pub const rocblas_datatype_u32_r: rocblas_datatype_ = rocblas_datatype_(163);
+}
+impl rocblas_datatype_ {
+    ///<  8-bit signed integer, complex
+    pub const rocblas_datatype_i8_c: rocblas_datatype_ = rocblas_datatype_(164);
+}
+impl rocblas_datatype_ {
+    ///<  8-bit unsigned integer, complex
+    pub const rocblas_datatype_u8_c: rocblas_datatype_ = rocblas_datatype_(165);
+}
+impl rocblas_datatype_ {
+    ///< 32-bit signed integer, complex
+    pub const rocblas_datatype_i32_c: rocblas_datatype_ = rocblas_datatype_(166);
+}
+impl rocblas_datatype_ {
+    ///< 32-bit unsigned integer, complex
+    pub const rocblas_datatype_u32_c: rocblas_datatype_ = rocblas_datatype_(167);
+}
+impl rocblas_datatype_ {
+    ///< 16-bit bfloat, real
+    pub const rocblas_datatype_bf16_r: rocblas_datatype_ = rocblas_datatype_(168);
+}
+impl rocblas_datatype_ {
+    ///< 16-bit bfloat, complex
+    pub const rocblas_datatype_bf16_c: rocblas_datatype_ = rocblas_datatype_(169);
+}
+impl rocblas_datatype_ {
+    ///< 8 bit floating point, real
+    pub const rocblas_datatype_f8_r: rocblas_datatype_ = rocblas_datatype_(170);
+}
+impl rocblas_datatype_ {
+    ///< 8 bit bfloat, real
+    pub const rocblas_datatype_bf8_r: rocblas_datatype_ = rocblas_datatype_(171);
+}
+impl rocblas_datatype_ {
+    ///< Invalid datatype value, do not use
+    pub const rocblas_datatype_invalid: rocblas_datatype_ = rocblas_datatype_(255);
+}
+#[repr(transparent)]
+/// \brief Indicates the precision width of data stored in a blas type.
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_datatype_(pub ::core::ffi::c_uint);
+/// \brief Indicates the precision width of data stored in a blas type.
+pub use self::rocblas_datatype_ as rocblas_datatype;
+impl rocblas_computetype_ {
+    pub const rocblas_compute_type_f32: rocblas_computetype_ = rocblas_computetype_(300);
+}
+impl rocblas_computetype_ {
+    pub const rocblas_compute_type_f8_f8_f32: rocblas_computetype_ = rocblas_computetype_(
+        301,
+    );
+}
+impl rocblas_computetype_ {
+    pub const rocblas_compute_type_f8_bf8_f32: rocblas_computetype_ = rocblas_computetype_(
+        302,
+    );
+}
+impl rocblas_computetype_ {
+    pub const rocblas_compute_type_bf8_f8_f32: rocblas_computetype_ = rocblas_computetype_(
+        303,
+    );
+}
+impl rocblas_computetype_ {
+    pub const rocblas_compute_type_bf8_bf8_f32: rocblas_computetype_ = rocblas_computetype_(
+        304,
+    );
+}
+impl rocblas_computetype_ {
+    ///< Invalid datatype value, do not use
+    pub const rocblas_compute_type_invalid: rocblas_computetype_ = rocblas_computetype_(
+        455,
+    );
+}
+#[repr(transparent)]
+/// \brief Indicates the compute precision mode.
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_computetype_(pub ::core::ffi::c_uint);
+/// \brief Indicates the compute precision mode.
+pub use self::rocblas_computetype_ as rocblas_computetype;
+///   @brief rocblas status codes definition
+pub type rocblas_status_ = ::core::ffi::c_uint;
+impl rocblas_pointer_mode_ {
+    /// \brief Scalar values affected by this variable are located on the host.
+    pub const rocblas_pointer_mode_host: rocblas_pointer_mode_ = rocblas_pointer_mode_(
+        0,
+    );
+}
+impl rocblas_pointer_mode_ {
+    /// \brief Scalar values affected by this variable are located on the device.
+    pub const rocblas_pointer_mode_device: rocblas_pointer_mode_ = rocblas_pointer_mode_(
+        1,
+    );
+}
+#[repr(transparent)]
+/** \brief Indicates if scalar pointers are on host or device. This is used for
+    scalars alpha and beta and for scalar function return values.*/
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_pointer_mode_(pub ::core::ffi::c_uint);
+/** \brief Indicates if scalar pointers are on host or device. This is used for
+    scalars alpha and beta and for scalar function return values.*/
+pub use self::rocblas_pointer_mode_ as rocblas_pointer_mode;
+impl rocblas_atomics_mode_ {
+    /// \brief Algorithms will refrain from atomics where applicable
+    pub const rocblas_atomics_not_allowed: rocblas_atomics_mode_ = rocblas_atomics_mode_(
+        0,
+    );
+}
+impl rocblas_atomics_mode_ {
+    /// \brief Algorithms will take advantage of atomics where applicable
+    pub const rocblas_atomics_allowed: rocblas_atomics_mode_ = rocblas_atomics_mode_(1);
+}
+#[repr(transparent)]
+/** \brief Indicates if atomics operations are allowed. Not allowing atomic operations
+    may generally improve determinism and repeatability of results at a cost of performance.
+    Defaults to rocblas_atomics_allowed.*/
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_atomics_mode_(pub ::core::ffi::c_uint);
+/** \brief Indicates if atomics operations are allowed. Not allowing atomic operations
+    may generally improve determinism and repeatability of results at a cost of performance.
+    Defaults to rocblas_atomics_allowed.*/
+pub use self::rocblas_atomics_mode_ as rocblas_atomics_mode;
+impl rocblas_performance_metric_ {
+    /// \brief Use Tensile's default performance metric for solution selection
+    pub const rocblas_default_performance_metric: rocblas_performance_metric_ = rocblas_performance_metric_(
+        0,
+    );
+}
+impl rocblas_performance_metric_ {
+    /// \brief Select the solution with the highest GFlops across all compute units
+    pub const rocblas_device_efficiency_performance_metric: rocblas_performance_metric_ = rocblas_performance_metric_(
+        1,
+    );
+}
+impl rocblas_performance_metric_ {
+    /** \brief Select the solution with the highest GFlops per compute unit it uses. This
+ may be useful when running multiple small gemm problems simultaneously*/
+    pub const rocblas_cu_efficiency_performance_metric: rocblas_performance_metric_ = rocblas_performance_metric_(
+        2,
+    );
+}
+#[repr(transparent)]
+/** \brief Indicates which performance metric Tensile uses when selecting the optimal
+    solution for gemm problems.*/
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_performance_metric_(pub ::core::ffi::c_uint);
+/** \brief Indicates which performance metric Tensile uses when selecting the optimal
+    solution for gemm problems.*/
+pub use self::rocblas_performance_metric_ as rocblas_performance_metric;
+impl rocblas_layer_mode_ {
+    /// \brief No logging will take place.
+    pub const rocblas_layer_mode_none: rocblas_layer_mode_ = rocblas_layer_mode_(0);
+}
+impl rocblas_layer_mode_ {
+    /// \brief A line containing the function name and value of arguments passed will be printed with each rocBLAS function call.
+    pub const rocblas_layer_mode_log_trace: rocblas_layer_mode_ = rocblas_layer_mode_(1);
+}
+impl rocblas_layer_mode_ {
+    /// \brief Outputs a line each time a rocBLAS function is called, this line can be used with rocblas-bench to make the same call again.
+    pub const rocblas_layer_mode_log_bench: rocblas_layer_mode_ = rocblas_layer_mode_(2);
+}
+impl rocblas_layer_mode_ {
+    /// \brief Outputs a YAML description of each rocBLAS function called, along with its arguments and number of times it was called.
+    pub const rocblas_layer_mode_log_profile: rocblas_layer_mode_ = rocblas_layer_mode_(
+        4,
+    );
+}
+#[repr(transparent)]
+/// \brief Indicates if layer is active with bitmask
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_layer_mode_(pub ::core::ffi::c_uint);
+/// \brief Indicates if layer is active with bitmask
+pub use self::rocblas_layer_mode_ as rocblas_layer_mode;
+impl rocblas_gemm_algo_ {
+    pub const rocblas_gemm_algo_standard: rocblas_gemm_algo_ = rocblas_gemm_algo_(0);
+}
+impl rocblas_gemm_algo_ {
+    pub const rocblas_gemm_algo_solution_index: rocblas_gemm_algo_ = rocblas_gemm_algo_(
+        1,
+    );
+}
+#[repr(transparent)]
+/// \brief Indicates if layer is active with bitmask
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_gemm_algo_(pub ::core::ffi::c_uint);
+/// \brief Indicates if layer is active with bitmask
+pub use self::rocblas_gemm_algo_ as rocblas_gemm_algo;
+impl rocblas_geam_ex_operation_ {
+    pub const rocblas_geam_ex_operation_min_plus: rocblas_geam_ex_operation_ = rocblas_geam_ex_operation_(
+        0,
+    );
+}
+impl rocblas_geam_ex_operation_ {
+    pub const rocblas_geam_ex_operation_plus_min: rocblas_geam_ex_operation_ = rocblas_geam_ex_operation_(
+        1,
+    );
+}
+#[repr(transparent)]
+/// \brief Which mathematical geam-like operation to perform for geam_ex
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_geam_ex_operation_(pub ::core::ffi::c_uint);
+/// \brief Which mathematical geam-like operation to perform for geam_ex
+pub use self::rocblas_geam_ex_operation_ as rocblas_geam_ex_operation;
+impl rocblas_gemm_flags_ {
+    /// \brief Default empty flags
+    pub const rocblas_gemm_flags_none: rocblas_gemm_flags_ = rocblas_gemm_flags_(0);
+}
+impl rocblas_gemm_flags_ {
+    #[doc = " \\brief Before ROCm 6.0 rocblas_gemm_flags_pack_int8x4 = 0x1, as has now been removed so is available for future use */\n/*! \\brief Select the gemm problem with the highest efficiency per compute unit used. Useful for running multiple smaller problems\n simultaneously. This takes precedence over the performance metric set in rocblas_handle and currently only works for\n gemm_*_ex problems."]
+    pub const rocblas_gemm_flags_use_cu_efficiency: rocblas_gemm_flags_ = rocblas_gemm_flags_(
+        2,
+    );
+}
+impl rocblas_gemm_flags_ {
+    /** \brief Select an alternate implementation for the MI200 FP16 HPA
+ (High Precision Accumulate) GEMM kernel utilizing the BF16 matrix
+ instructions with reduced accuracy in cases where computation cannot
+ tolerate the FP16 matrix instructions flushing subnormal FP16
+ input/output data to zero. See the "MI200 (gfx90a) Considerations"
+ section for more details.*/
+    pub const rocblas_gemm_flags_fp16_alt_impl: rocblas_gemm_flags_ = rocblas_gemm_flags_(
+        4,
+    );
+}
+impl rocblas_gemm_flags_ {
+    /** \brief Select an alternate implementation for the MI200 FP16 HPA
+ (High Precision Accumulate) GEMM kernel utilizing the BF16 matrix
+ instructions with reduced accuracy in cases where computation cannot
+ tolerate the FP16 matrix instructions flushing subnormal FP16
+ input/output data to zero. See the "MI200 (gfx90a) Considerations"
+ section for more details.*/
+    pub const rocblas_gemm_flags_check_solution_index: rocblas_gemm_flags_ = rocblas_gemm_flags_(
+        8,
+    );
+}
+impl rocblas_gemm_flags_ {
+    /** \brief Select an alternate implementation for the MI200 FP16 HPA
+ (High Precision Accumulate) GEMM kernel utilizing the BF16 matrix
+ instructions with reduced accuracy in cases where computation cannot
+ tolerate the FP16 matrix instructions flushing subnormal FP16
+ input/output data to zero. See the "MI200 (gfx90a) Considerations"
+ section for more details.*/
+    pub const rocblas_gemm_flags_fp16_alt_impl_rnz: rocblas_gemm_flags_ = rocblas_gemm_flags_(
+        16,
+    );
+}
+impl rocblas_gemm_flags_ {
+    /** \brief Select an alternate implementation for the MI200 FP16 HPA
+ (High Precision Accumulate) GEMM kernel utilizing the BF16 matrix
+ instructions with reduced accuracy in cases where computation cannot
+ tolerate the FP16 matrix instructions flushing subnormal FP16
+ input/output data to zero. See the "MI200 (gfx90a) Considerations"
+ section for more details.*/
+    pub const rocblas_gemm_flags_stochastic_rounding: rocblas_gemm_flags_ = rocblas_gemm_flags_(
+        32,
+    );
+}
+#[repr(transparent)]
+/// \brief Control flags passed into gemm algorithms invoked by Tensile Host
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_gemm_flags_(pub ::core::ffi::c_uint);
+/// \brief Control flags passed into gemm algorithms invoked by Tensile Host
+pub use self::rocblas_gemm_flags_ as rocblas_gemm_flags;
+/// \brief Union for representing scalar values
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union rocblas_union_u {
+    pub h: rocblas_half,
+    pub s: f32,
+    pub d: f64,
+    pub i: i32,
+    pub c: rocblas_float_complex,
+    pub z: rocblas_double_complex,
+}
+/// \brief Union for representing scalar values
+pub type rocblas_union_t = rocblas_union_u;
+impl rocblas_check_numerics_mode_ {
+    pub const rocblas_check_numerics_mode_no_check: rocblas_check_numerics_mode_ = rocblas_check_numerics_mode_(
+        0,
+    );
+}
+impl rocblas_check_numerics_mode_ {
+    pub const rocblas_check_numerics_mode_info: rocblas_check_numerics_mode_ = rocblas_check_numerics_mode_(
+        1,
+    );
+}
+impl rocblas_check_numerics_mode_ {
+    pub const rocblas_check_numerics_mode_warn: rocblas_check_numerics_mode_ = rocblas_check_numerics_mode_(
+        2,
+    );
+}
+impl rocblas_check_numerics_mode_ {
+    pub const rocblas_check_numerics_mode_fail: rocblas_check_numerics_mode_ = rocblas_check_numerics_mode_(
+        4,
+    );
+}
+impl rocblas_check_numerics_mode_ {
+    pub const rocblas_check_numerics_mode_only_nan_inf: rocblas_check_numerics_mode_ = rocblas_check_numerics_mode_(
+        8,
+    );
+}
+#[repr(transparent)]
+/// \brief Numerical checking for verifying the Input and Output vector/matrix of the rocBLAS functions for a NaN, zero, infinity and denormal value
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_check_numerics_mode_(pub ::core::ffi::c_uint);
+/// \brief Numerical checking for verifying the Input and Output vector/matrix of the rocBLAS functions for a NaN, zero, infinity and denormal value
+pub use self::rocblas_check_numerics_mode_ as rocblas_check_numerics_mode;
+impl rocblas_math_mode_ {
+    pub const rocblas_default_math: rocblas_math_mode_ = rocblas_math_mode_(0);
+}
+impl rocblas_math_mode_ {
+    pub const rocblas_xf32_xdl_math_op: rocblas_math_mode_ = rocblas_math_mode_(1);
+}
+#[repr(transparent)]
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct rocblas_math_mode_(pub ::core::ffi::c_uint);
+pub use self::rocblas_math_mode_ as rocblas_math_mode;
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Create handle
+    pub fn rocblas_create_handle(handle: *mut rocblas_handle) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Destroy handle
+    pub fn rocblas_destroy_handle(handle: rocblas_handle) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Set stream for handle
+    pub fn rocblas_set_stream(
+        handle: rocblas_handle,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Get stream [0] from handle
+    pub fn rocblas_get_stream(
+        handle: rocblas_handle,
+        stream: *mut hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Set rocblas_pointer_mode
+    pub fn rocblas_set_pointer_mode(
+        handle: rocblas_handle,
+        pointer_mode: rocblas_pointer_mode,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Get rocblas_pointer_mode
+    pub fn rocblas_get_pointer_mode(
+        handle: rocblas_handle,
+        pointer_mode: *mut rocblas_pointer_mode,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Set rocblas_atomics_mode
+  \details
+  Some rocBLAS functions may have implementations which use atomic operations to increase performance.
+  By using atomic operations, results are not guaranteed to be identical between multiple runs.
+  Results will be accurate with or without atomic operations, but if it is required to
+  have bit-wise reproducible results, atomic operations should not be used.
+
+  Atomic operations can be turned on or off for a handle by calling rocblas_set_atomics_mode.
+  By default, this is set to `rocblas_atomics_allowed`.*/
+    pub fn rocblas_set_atomics_mode(
+        handle: rocblas_handle,
+        atomics_mode: rocblas_atomics_mode,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Get rocblas_atomics_mode
+    pub fn rocblas_get_atomics_mode(
+        handle: rocblas_handle,
+        atomics_mode: *mut rocblas_atomics_mode,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Set rocblas_math_mode
+    pub fn rocblas_set_math_mode(
+        handle: rocblas_handle,
+        math_mode: rocblas_math_mode,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// \brief Get rocblas_math_mode
+    pub fn rocblas_get_math_mode(
+        handle: rocblas_handle,
+        math_mode: *mut rocblas_math_mode,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    /// \brief  Indicates whether the pointer is on the host or device.
+    pub fn rocblas_pointer_to_mode(
+        ptr: *mut ::core::ffi::c_void,
+    ) -> rocblas_pointer_mode;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Copy vector from host to device
+@param[in]
+n           [rocblas_int]
+number of elements in the vector
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+x           pointer to vector on the host
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of the vector
+@param[out]
+y           pointer to vector on the device
+@param[in]
+incy        [rocblas_int]
+specifies the increment for the elements of the vector*/
+    pub fn rocblas_set_vector(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_vector_64(
+        n: i64,
+        elem_size: i64,
+        x: *const ::core::ffi::c_void,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Copy vector from device to host
+@param[in]
+n           [rocblas_int]
+number of elements in the vector
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+x           pointer to vector on the device
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of the vector
+@param[out]
+y           pointer to vector on the host
+@param[in]
+incy        [rocblas_int]
+specifies the increment for the elements of the vector*/
+    pub fn rocblas_get_vector(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_get_vector_64(
+        n: i64,
+        elem_size: i64,
+        x: *const ::core::ffi::c_void,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Copy matrix from host to device
+@param[in]
+rows        [rocblas_int]
+number of rows in matrices
+@param[in]
+cols        [rocblas_int]
+number of columns in matrices
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+a           pointer to matrix on the host
+@param[in]
+lda         [rocblas_int]
+specifies the leading dimension of A, lda >= rows
+@param[out]
+b           pointer to matrix on the GPU
+@param[in]
+ldb         [rocblas_int]
+specifies the leading dimension of B, ldb >= rows*/
+    pub fn rocblas_set_matrix(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::core::ffi::c_void,
+        lda: rocblas_int,
+        b: *mut ::core::ffi::c_void,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_matrix_64(
+        rows: i64,
+        cols: i64,
+        elem_size: i64,
+        a: *const ::core::ffi::c_void,
+        lda: i64,
+        b: *mut ::core::ffi::c_void,
+        ldb: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Copy matrix from device to host
+@param[in]
+rows        [rocblas_int]
+number of rows in matrices
+@param[in]
+cols        [rocblas_int]
+number of columns in matrices
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+a           pointer to matrix on the GPU
+@param[in]
+lda         [rocblas_int]
+specifies the leading dimension of A, lda >= rows
+@param[out]
+b           pointer to matrix on the host
+@param[in]
+ldb         [rocblas_int]
+specifies the leading dimension of B, ldb >= rows*/
+    pub fn rocblas_get_matrix(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::core::ffi::c_void,
+        lda: rocblas_int,
+        b: *mut ::core::ffi::c_void,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_get_matrix_64(
+        rows: i64,
+        cols: i64,
+        elem_size: i64,
+        a: *const ::core::ffi::c_void,
+        lda: i64,
+        b: *mut ::core::ffi::c_void,
+        ldb: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Asynchronously copy vector from host to device
+\details
+rocblas_set_vector_async copies a vector from pinned host memory to device memory asynchronously.
+Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
+@param[in]
+n           [rocblas_int]
+number of elements in the vector
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+x           pointer to vector on the host
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of the vector
+@param[out]
+y           pointer to vector on the device
+@param[in]
+incy        [rocblas_int]
+specifies the increment for the elements of the vector
+@param[in]
+stream      specifies the stream into which this transfer request is queued*/
+    pub fn rocblas_set_vector_async(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        incy: rocblas_int,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_vector_async_64(
+        n: i64,
+        elem_size: i64,
+        x: *const ::core::ffi::c_void,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        incy: i64,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Asynchronously copy vector from device to host
+\details
+rocblas_get_vector_async copies a vector from pinned host memory to device memory asynchronously.
+Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
+@param[in]
+n           [rocblas_int]
+number of elements in the vector
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+x           pointer to vector on the device
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of the vector
+@param[out]
+y           pointer to vector on the host
+@param[in]
+incy        [rocblas_int]
+specifies the increment for the elements of the vector
+@param[in]
+stream      specifies the stream into which this transfer request is queued*/
+    pub fn rocblas_get_vector_async(
+        n: rocblas_int,
+        elem_size: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        incy: rocblas_int,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_get_vector_async_64(
+        n: i64,
+        elem_size: i64,
+        x: *const ::core::ffi::c_void,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        incy: i64,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief Asynchronously copy matrix from host to device
+\details
+rocblas_set_matrix_async copies a matrix from pinned host memory to device memory asynchronously.
+Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
+@param[in]
+rows        [rocblas_int]
+number of rows in matrices
+@param[in]
+cols        [rocblas_int]
+number of columns in matrices
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+a           pointer to matrix on the host
+@param[in]
+lda         [rocblas_int]
+specifies the leading dimension of A, lda >= rows
+@param[out]
+b           pointer to matrix on the GPU
+@param[in]
+ldb         [rocblas_int]
+specifies the leading dimension of B, ldb >= rows
+@param[in]
+stream      specifies the stream into which this transfer request is queued*/
+    pub fn rocblas_set_matrix_async(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::core::ffi::c_void,
+        lda: rocblas_int,
+        b: *mut ::core::ffi::c_void,
+        ldb: rocblas_int,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_matrix_async_64(
+        rows: i64,
+        cols: i64,
+        elem_size: i64,
+        a: *const ::core::ffi::c_void,
+        lda: i64,
+        b: *mut ::core::ffi::c_void,
+        ldb: i64,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief asynchronously copy matrix from device to host
+\details
+rocblas_get_matrix_async copies a matrix from device memory to pinned host memory asynchronously.
+Memory on the host must be allocated with hipHostMalloc or the transfer will be synchronous.
+@param[in]
+rows        [rocblas_int]
+number of rows in matrices
+@param[in]
+cols        [rocblas_int]
+number of columns in matrices
+@param[in]
+elem_size   [rocblas_int]
+number of bytes per element in the matrix
+@param[in]
+a           pointer to matrix on the GPU
+@param[in]
+lda         [rocblas_int]
+specifies the leading dimension of A, lda >= rows
+@param[out]
+b           pointer to matrix on the host
+@param[in]
+ldb         [rocblas_int]
+specifies the leading dimension of B, ldb >= rows
+@param[in]
+stream      specifies the stream into which this transfer request is queued*/
+    pub fn rocblas_get_matrix_async(
+        rows: rocblas_int,
+        cols: rocblas_int,
+        elem_size: rocblas_int,
+        a: *const ::core::ffi::c_void,
+        lda: rocblas_int,
+        b: *mut ::core::ffi::c_void,
+        ldb: rocblas_int,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_get_matrix_async_64(
+        rows: i64,
+        cols: i64,
+        elem_size: i64,
+        a: *const ::core::ffi::c_void,
+        lda: i64,
+        b: *mut ::core::ffi::c_void,
+        ldb: i64,
+        stream: hip_runtime_sys::hipStream_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /// Function to set start/stop event handlers (for internal use only)
+    pub fn rocblas_set_start_stop_events(
+        handle: rocblas_handle,
+        startEvent: hip_runtime_sys::hipEvent_t,
+        stopEvent: hip_runtime_sys::hipEvent_t,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_solution_fitness_query(
+        handle: rocblas_handle,
+        fitness: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief specifies the performance metric that solution selection uses
+\details
+Determines which performance metric will be used by Tensile when selecting the optimal solution
+for gemm problems. If a valid solution benchmarked for this performance metric does not exist
+for a problem, Tensile will default to a solution benchmarked for overall performance instead.
+@param[in]
+handle      [rocblas_handle]
+the handle of device
+@param[in]
+metric      [rocblas_performance_metric]
+the performance metric to be used*/
+    pub fn rocblas_set_performance_metric(
+        handle: rocblas_handle,
+        metric: rocblas_performance_metric,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief returns the performance metric being used for solution selection
+\details
+Returns the performance metric used by Tensile to select the optimal solution for gemm problems.
+@param[in]
+handle      [rocblas_handle]
+the handle of device
+@param[out]
+metric      [rocblas_performance_metric*]
+pointer to where the metric will be stored*/
+    pub fn rocblas_get_performance_metric(
+        handle: rocblas_handle,
+        metric: *mut rocblas_performance_metric,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+scal  scales each element of vector x with scalar alpha:
+
+x := alpha * x
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+alpha     device pointer or host pointer for the scalar alpha.
+@param[in, out]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+*/
+    pub fn rocblas_sscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+scal_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:
+
+x_i := alpha * x_i,
+where (x_i) is the i-th instance of the batch.
+
+@param[in]
+handle      [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n           [rocblas_int]
+the number of elements in each x_i.
+@param[in]
+alpha       host pointer or device pointer for the scalar alpha.
+@param[in, out]
+x           device array of device pointers storing each vector x_i.
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+batch_count [rocblas_int]
+specifies the number of batches in x.
+*/
+    pub fn rocblas_sscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+scal_strided_batched  scales each element of vector x_i with scalar alpha, for i = 1, ... , batch_count:
+
+x_i := alpha * x_i,
+where (x_i) is the i-th instance of the batch.
+
+@param[in]
+handle      [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n           [rocblas_int]
+the number of elements in each x_i.
+@param[in]
+alpha       host pointer or device pointer for the scalar alpha.
+@param[in, out]
+x           device pointer to the first vector (x_1) in the batch.
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+stride_x    [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size, for a typical
+case this means stride_x >= n * incx.
+@param[in]
+batch_count [rocblas_int]
+specifies the number of batches in x.
+*/
+    pub fn rocblas_sscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdscal_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+copy  copies each element x[i] into y[i], for  i = 1 , ... , n:
+
+y := x
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x to be copied to y.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[out]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_scopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+copy_batched copies each element x_i[j] into y_i[j], for  j = 1 , ... , n; i = 1 , ... , batch_count:
+
+y_i := x_i,
+where (x_i, y_i) is the i-th instance of the batch.
+x_i and y_i are vectors.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i to be copied to y_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_scopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+copy_strided_batched copies each element x_i[j] into y_i[j], for  j = 1 , ... , n; i = 1 , ... , batch_count:
+
+y_i := x_i,
+where (x_i, y_i) is the i-th instance of the batch.
+x_i and y_i are vectors.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i to be copied to y_i.
+@param[in]
+x         device pointer to the first vector (x_1) in the batch.
+@param[in]
+incx      [rocblas_int]
+specifies the increments for the elements of vectors x_i.
+@param[in]
+stridex     [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, the user should
+take care to ensure that stride_x is of appropriate size. For a typical
+case, this means stride_x >= n * incx.
+@param[out]
+y         device pointer to the first vector (y_1) in the batch.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of vectors y_i.
+@param[in]
+stridey     [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+There are no restrictions placed on stride_y, However, ensure that stride_y is of appropriate size, for a typical
+case this means stride_y >= n * incy. stridey should be non zero.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_scopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dcopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ccopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zcopy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+dot(u)  performs the dot product of vectors x and y:
+
+result = x * y;
+
+dotc  performs the dot product of the conjugate of complex vector x and complex vector y.
+
+result = conjugate (x) * y;
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x and y.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+result
+device pointer or host pointer to store the dot product.
+return is 0.0 if n <= 0.
+*/
+    pub fn rocblas_sdot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        y: *const rocblas_half,
+        incy: rocblas_int,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        y: *const rocblas_bfloat16,
+        incy: rocblas_int,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sdot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        y: *const f32,
+        incy: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        y: *const f64,
+        incy: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_half,
+        incx: i64,
+        y: *const rocblas_half,
+        incy: i64,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_bfloat16,
+        incx: i64,
+        y: *const rocblas_bfloat16,
+        incy: i64,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+dot_batched(u) performs a batch of dot products of vectors x and y:
+
+result_i = x_i * y_i;
+
+dotc_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y
+
+result_i = conjugate (x_i) * y_i;
+where (x_i, y_i) is the i-th instance of the batch.
+x_i and y_i are vectors, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in, out]
+result
+device array or host array of batch_count size to store the dot products of each batch.
+return 0.0 for each element if n <= 0.
+*/
+    pub fn rocblas_sdot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        y: *const *const rocblas_half,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_bfloat16,
+        incx: rocblas_int,
+        y: *const *const rocblas_bfloat16,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sdot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *const f32,
+        incy: i64,
+        batch_count: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *const f64,
+        incy: i64,
+        batch_count: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_half,
+        incx: i64,
+        y: *const *const rocblas_half,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_bfloat16,
+        incx: i64,
+        y: *const *const rocblas_bfloat16,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+dot_strided_batched(u)  performs a batch of dot products of vectors x and y:
+
+result_i = x_i * y_i;
+
+dotc_strided_batched  performs a batch of dot products of the conjugate of complex vector x and complex vector y
+
+result_i = conjugate (x_i) * y_i;
+where (x_i, y_i) is the i-th instance of the batch.
+x_i and y_i are vectors, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in]
+x         device pointer to the first vector (x_1) in the batch.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stridex     [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in]
+y         device pointer to the first vector (y_1) in the batch.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+stridey     [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in, out]
+result
+device array or host array of batch_count size to store the dot products of each batch.
+return 0.0 for each element if n <= 0.
+*/
+    pub fn rocblas_sdot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_half,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_bfloat16,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sdot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hdot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_half,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_half,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_half,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_bfdot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_bfloat16,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_bfloat16,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_bfloat16,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotu_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotu_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdotc_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdotc_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        result: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+swap  interchanges vectors x and y:
+
+y := x;
+x := y
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x and y.
+@param[in, out]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in, out]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_sswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+swap_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:
+
+y_i := x_i;
+x_i := y_i
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in, out]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in, out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+swap_strided_batched interchanges vectors x_i and y_i, for i = 1 , ... , batch_count:
+
+y_i := x_i;
+x_i := y_i
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in, out]
+x         device pointer to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+stridex   [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical
+case this means stride_x >= n * incx.
+@param[in, out]
+y         device pointer to the first vector y_1.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+stridey   [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_y is of appropriate size. For a typical
+case this means stride_y >= n * incy. stridey should be non zero.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zswap_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+axpy   computes constant alpha multiplied by vector x, plus vector y:
+
+y := alpha * x + y
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x and y.
+@param[in]
+alpha     device pointer or host pointer to specify the scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[out]
+y         device pointer storing vector y.
+@param[in, out]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_haxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        y: *mut rocblas_half,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_haxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: i64,
+        y: *mut rocblas_half,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+axpy_batched compute y := alpha * x + y over a set of batched vectors.
+
+@param[in]
+handle    rocblas_handle
+handle to the rocblas library context queue.
+@param[in]
+n         rocblas_int
+@param[in]
+alpha     specifies the scalar alpha.
+@param[in]
+x         pointer storing vector x on the GPU.
+@param[in]
+incx      rocblas_int
+specifies the increment for the elements of x.
+@param[out]
+y         pointer storing vector y on the GPU.
+@param[in, out]
+incy      rocblas_int
+specifies the increment for the elements of y.
+
+@param[in]
+batch_count rocblas_int
+number of instances in the batch.
+*/
+    pub fn rocblas_haxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_half,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        y: *const *mut rocblas_half,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_haxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_half,
+        x: *const *const rocblas_half,
+        incx: i64,
+        y: *const *mut rocblas_half,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+axpy_strided_batched compute y := alpha * x + y over a set of strided batched vectors.
+
+@param[in]
+handle    rocblas_handle
+handle to the rocblas library context queue.
+@param[in]
+n         rocblas_int.
+@param[in]
+alpha     specifies the scalar alpha.
+@param[in]
+x         pointer storing vector x on the GPU.
+@param[in]
+incx      rocblas_int
+specifies the increment for the elements of x.
+@param[in]
+stridex   rocblas_stride
+specifies the increment between vectors of x.
+@param[out]
+y         pointer storing vector y on the GPU.
+@param[in, out]
+incy      rocblas_int
+specifies the increment for the elements of y.
+@param[in]
+stridey   rocblas_stride
+specifies the increment between vectors of y.
+
+@param[in]
+batch_count rocblas_int
+number of instances in the batch.
+*/
+    pub fn rocblas_haxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_half,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_haxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_half,
+        x: *const rocblas_half,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_half,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_saxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_daxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_caxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zaxpy_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+asum computes the sum of the magnitudes of elements of a real vector x,
+or the sum of magnitudes of the real and imaginary parts of elements if x is a complex vector.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x and y.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x. incx must be > 0.
+@param[in, out]
+result
+device pointer or host pointer to store the asum product.
+return is 0.0 if n <= 0.
+*/
+    pub fn rocblas_sasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+asum_batched computes the sum of the magnitudes of the elements in a batch of real vectors x_i,
+or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex
+vector, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each vector x_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[out]
+results
+device array or host array of batch_count size for results.
+return is 0.0 if n, incx<=0.
+*/
+    pub fn rocblas_sasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+asum_strided_batched computes the sum of the magnitudes of elements of a real vectors x_i,
+or the sum of magnitudes of the real and imaginary parts of elements if x_i is a complex
+vector, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each vector x_i.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+stridex   [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical
+case this means stride_x >= n * incx.
+@param[out]
+results
+device pointer or host pointer to array for storing contiguous batch_count results.
+return is 0.0 if n, incx<=0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dzasum_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+nrm2 computes the euclidean norm of a real or complex vector:
+
+result := sqrt( x'*x ) for real vectors
+result := sqrt( x**H*x ) for complex vectors
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+result
+device pointer or host pointer to store the nrm2 product.
+return is 0.0 if n, incx<=0.
+*/
+    pub fn rocblas_snrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_snrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+nrm2_batched computes the euclidean norm over a batch of real or complex vectors:
+
+result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count
+result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batch_count
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each x_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[out]
+results
+device pointer or host pointer to array of batch_count size for nrm2 results.
+return is 0.0 for each element if n <= 0, incx<=0.
+*/
+    pub fn rocblas_snrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_snrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+nrm2_strided_batched computes the euclidean norm over a batch of real or complex vectors:
+
+result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count
+result := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batch_count
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each x_i.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+stridex   [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical
+case this means stride_x >= n * incx.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[out]
+results
+device pointer or host pointer to array for storing contiguous batch_count results.
+return is 0.0 for each element if n <= 0, incx<=0.
+*/
+    pub fn rocblas_snrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_snrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dnrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scnrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dznrm2_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        results: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+amax finds the first index of the element of maximum magnitude of a vector x.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+result
+device pointer or host pointer to store the amax index.
+return is 0.0 if n, incx<=0.
+*/
+    pub fn rocblas_isamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+amax_batched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each vector x_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch. Must be > 0.
+@param[out]
+result
+device or host array of pointers of batch_count size for results.
+return is 0 if n, incx<=0.
+*/
+    pub fn rocblas_isamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+amax_strided_batched finds the first index of the element of maximum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each vector x_i.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+stridex   [rocblas_stride]
+specifies the pointer increment between one x_i and the next x_(i + 1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[out]
+result
+device or host pointer for storing contiguous batch_count results.
+return is 0 if n <= 0, incx<=0.
+*/
+    pub fn rocblas_isamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamax_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+amin finds the first index of the element of minimum magnitude of a vector x.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+result
+device pointer or host pointer to store the amin index.
+return is 0.0 if n, incx<=0.
+*/
+    pub fn rocblas_isamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+amin_batched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each vector x_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch. Must be > 0.
+@param[out]
+result
+device or host pointers to array of batch_count size for results.
+return is 0 if n, incx<=0.
+*/
+    pub fn rocblas_isamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f32,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const f64,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+amin_strided_batched finds the first index of the element of minimum magnitude of each vector x_i in a batch, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each vector x_i.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+stridex   [rocblas_stride]
+specifies the pointer increment between one x_i and the next x_(i + 1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[out]
+result
+device or host pointer to array for storing contiguous batch_count results.
+return is 0 if n <= 0, incx<=0.
+*/
+    pub fn rocblas_isamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_isamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_idamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_icamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_izamin_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        result: *mut i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rot applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.
+Scalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in the x and y vectors.
+@param[in, out]
+x       device pointer storing vector x.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of x.
+@param[in, out]
+y       device pointer storing vector y.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of y.
+@param[in]
+c       device pointer or host pointer storing scalar cosine component of the rotation matrix.
+@param[in]
+s       device pointer or host pointer storing scalar sine component of the rotation matrix.
+*/
+    pub fn rocblas_srot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rot_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.
+Scalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in each x_i and y_i vectors.
+@param[in, out]
+x       device array of deivce pointers storing each vector x_i.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of each x_i.
+@param[in, out]
+y       device array of device pointers storing each vector y_i.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of each y_i.
+@param[in]
+c       device pointer or host pointer to scalar cosine component of the rotation matrix.
+@param[in]
+s       device pointer or host pointer to scalar sine component of the rotation matrix.
+@param[in]
+batch_count [rocblas_int]
+the number of x and y arrays, i.e. the number of batches.
+*/
+    pub fn rocblas_srot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rot_strided_batched applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.
+Scalars c and s may be stored in either host or device memory, location is specified by calling rocblas_set_pointer_mode.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in each x_i and y_i vectors.
+@param[in, out]
+x       device pointer to the first vector x_1.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of each x_i.
+@param[in]
+stride_x [rocblas_stride]
+specifies the increment from the beginning of x_i to the beginning of x_(i+1).
+@param[in, out]
+y       device pointer to the first vector y_1.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of each y_i.
+@param[in]
+stride_y [rocblas_stride]
+specifies the increment from the beginning of y_i to the beginning of y_(i+1)
+@param[in]
+c       device pointer or host pointer to scalar cosine component of the rotation matrix.
+@param[in]
+s       device pointer or host pointer to scalar sine component of the rotation matrix.
+@param[in]
+batch_count [rocblas_int]
+the number of x and y arrays, i.e. the number of batches.
+*/
+    pub fn rocblas_srot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csrot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f32,
+        s: *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdrot_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const f64,
+        s: *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotg creates the Givens rotation matrix for the vector (a b).
+Scalars a, b, c, and s may be stored in either host or device memory, location is specified by
+calling rocblas_set_pointer_mode. The computation uses the formulas
+
+sigma = sgn(a)    if |a| >  |b|
+= sgn(b)    if |b| >= |a|
+r = sigma*sqrt( a**2 + b**2 )
+c = 1; s = 0      if r = 0
+c = a/r; s = b/r  if r != 0
+
+The subroutine also computes
+
+z = s    if |a| > |b|,
+= 1/c  if |b| >= |a| and c != 0
+= 1    if c = 0
+
+This allows c and s to be reconstructed from z as follows:
+
+If z = 1, set c = 0, s = 1.
+If |z| < 1, set c = sqrt(1 - z**2) and s = z.
+If |z| > 1, set c = 1/z and s = sqrt( 1 - c**2).
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in, out]
+a       pointer to a, an element in vector (a,b), overwritten with r.
+@param[in, out]
+b       pointer to b, an element in vector (a,b), overwritten with z.
+@param[out]
+c       pointer to c, cosine element of Givens rotation.
+@param[out]
+s       pointer to s, sine element of Givens rotation.
+*/
+    pub fn rocblas_srotg(
+        handle: rocblas_handle,
+        a: *mut f32,
+        b: *mut f32,
+        c: *mut f32,
+        s: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg(
+        handle: rocblas_handle,
+        a: *mut f64,
+        b: *mut f64,
+        c: *mut f64,
+        s: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        b: *mut rocblas_float_complex,
+        c: *mut f32,
+        s: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        b: *mut rocblas_double_complex,
+        c: *mut f64,
+        s: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotg_64(
+        handle: rocblas_handle,
+        a: *mut f32,
+        b: *mut f32,
+        c: *mut f32,
+        s: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_64(
+        handle: rocblas_handle,
+        a: *mut f64,
+        b: *mut f64,
+        c: *mut f64,
+        s: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        b: *mut rocblas_float_complex,
+        c: *mut f32,
+        s: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        b: *mut rocblas_double_complex,
+        c: *mut f64,
+        s: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotg_batched creates the Givens rotation matrix for the batched vectors (a_i b_i), for i = 1, ..., batch_count.
+a, b, c, and s are host pointers to an array of device pointers on the device, where each device pointer points
+to a scalar value of a_i, b_i, c_i, or s_i.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in, out]
+a       a, overwritten with r.
+@param[in, out]
+b       b overwritten with z.
+@param[out]
+c       cosine element of Givens rotation for the batch.
+@param[out]
+s       sine element of Givens rotation for the batch.
+@param[in]
+batch_count [rocblas_int]
+number of batches (length of arrays a, b, c, and s).
+*/
+    pub fn rocblas_srotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut f32,
+        b: *const *mut f32,
+        c: *const *mut f32,
+        s: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut f64,
+        b: *const *mut f64,
+        c: *const *mut f64,
+        s: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_float_complex,
+        b: *const *mut rocblas_float_complex,
+        c: *const *mut f32,
+        s: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_batched(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_double_complex,
+        b: *const *mut rocblas_double_complex,
+        c: *const *mut f64,
+        s: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut f32,
+        b: *const *mut f32,
+        c: *const *mut f32,
+        s: *const *mut f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut f64,
+        b: *const *mut f64,
+        c: *const *mut f64,
+        s: *const *mut f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_float_complex,
+        b: *const *mut rocblas_float_complex,
+        c: *const *mut f32,
+        s: *const *mut rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_batched_64(
+        handle: rocblas_handle,
+        a: *const *mut rocblas_double_complex,
+        b: *const *mut rocblas_double_complex,
+        c: *const *mut f64,
+        s: *const *mut rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotg_strided_batched creates the Givens rotation matrix for the strided batched vectors (a_i b_i), for i = 1, ..., batch_count.
+a, b, c, and s are host pointers to arrays a, b, c, s on the device.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in, out]
+a       host pointer to first single input vector element a_1 on the device, overwritten with r.
+@param[in]
+stride_a [rocblas_stride]
+distance between elements of a in batch (distance between a_i and a_(i + 1)).
+@param[in, out]
+b       host pointer to first single input vector element b_1 on the device, overwritten with z.
+@param[in]
+stride_b [rocblas_stride]
+distance between elements of b in batch (distance between b_i and b_(i + 1)).
+@param[out]
+c       host pointer to first single cosine element of Givens rotations c_1 on the device.
+@param[in]
+stride_c [rocblas_stride]
+distance between elements of c in batch (distance between c_i and c_(i + 1)).
+@param[out]
+s       host pointer to first single sine element of Givens rotations s_1 on the device.
+@param[in]
+stride_s [rocblas_stride]
+distance between elements of s in batch (distance between s_i and s_(i + 1)).
+@param[in]
+batch_count [rocblas_int]
+number of batches (length of arrays a, b, c, and s).
+*/
+    pub fn rocblas_srotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut f32,
+        stride_a: rocblas_stride,
+        b: *mut f32,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut f32,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut f64,
+        stride_a: rocblas_stride,
+        b: *mut f64,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut f64,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_float_complex,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_float_complex,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_strided_batched(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_double_complex,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_double_complex,
+        stride_s: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut f32,
+        stride_a: rocblas_stride,
+        b: *mut f32,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut f32,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut f64,
+        stride_a: rocblas_stride,
+        b: *mut f64,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut f64,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_crotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_float_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_float_complex,
+        stride_b: rocblas_stride,
+        c: *mut f32,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_float_complex,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zrotg_strided_batched_64(
+        handle: rocblas_handle,
+        a: *mut rocblas_double_complex,
+        stride_a: rocblas_stride,
+        b: *mut rocblas_double_complex,
+        stride_b: rocblas_stride,
+        c: *mut f64,
+        stride_c: rocblas_stride,
+        s: *mut rocblas_double_complex,
+        stride_s: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotm applies the modified Givens rotation matrix defined by param to vectors x and y.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in the x and y vectors.
+@param[in, out]
+x       device pointer storing vector x.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of x.
+@param[in, out]
+y       device pointer storing vector y.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of y.
+@param[in]
+param   device vector or host vector of 5 elements defining the rotation.
+
+param[0] = flag
+param[1] = H11
+param[2] = H21
+param[3] = H12
+param[4] = H22
+
+The flag parameter defines the form of H:
+
+flag = -1 => H = ( H11 H12 H21 H22 )
+flag =  0 => H = ( 1.0 H12 H21 1.0 )
+flag =  1 => H = ( H11 1.0 -1.0 H22 )
+flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
+
+param may be stored in either host or device memory,
+location is specified by calling rocblas_set_pointer_mode.
+*/
+    pub fn rocblas_srotm(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        y: *mut f32,
+        incy: rocblas_int,
+        param: *const f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        y: *mut f64,
+        incy: rocblas_int,
+        param: *const f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotm_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        y: *mut f32,
+        incy: i64,
+        param: *const f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        y: *mut f64,
+        incy: i64,
+        param: *const f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotm_batched applies the modified Givens rotation matrix defined by param_i to batched vectors x_i and y_i, for i = 1, ..., batch_count.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in the x and y vectors.
+@param[in, out]
+x       device array of device pointers storing each vector x_i.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of each x_i.
+@param[in, out]
+y       device array of device pointers storing each vector y_1.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of each y_i.
+@param[in]
+param   device array of device vectors of 5 elements defining the rotation.
+
+param[0] = flag
+param[1] = H11
+param[2] = H21
+param[3] = H12
+param[4] = H22
+
+The flag parameter defines the form of H:
+
+flag = -1 => H = ( H11 H12 H21 H22 )
+flag =  0 => H = ( 1.0 H12 H21 1.0 )
+flag =  1 => H = ( H11 1.0 -1.0 H22 )
+flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
+
+param may ONLY be stored on the device for the batched version of this function.
+
+@param[in]
+batch_count [rocblas_int]
+the number of x and y arrays, i.e. the number of batches.
+*/
+    pub fn rocblas_srotm_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        param: *const *const f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        param: *const *const f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotm_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f32,
+        incx: i64,
+        y: *const *mut f32,
+        incy: i64,
+        param: *const *const f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const *mut f64,
+        incx: i64,
+        y: *const *mut f64,
+        incy: i64,
+        param: *const *const f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotm_strided_batched applies the modified Givens rotation matrix defined by param_i to strided batched vectors x_i and y_i, for i = 1, ..., batch_count
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in the x and y vectors.
+@param[in, out]
+x       device pointer pointing to first strided batched vector x_1.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of each x_i.
+@param[in]
+stride_x [rocblas_stride]
+specifies the increment between the beginning of x_i and x_(i + 1)
+@param[in, out]
+y       device pointer pointing to first strided batched vector y_1.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of each y_i.
+@param[in]
+stride_y [rocblas_stride]
+specifies the increment between the beginning of y_i and y_(i + 1).
+@param[in]
+param   device pointer pointing to first array of 5 elements defining the rotation (param_1).
+
+param[0] = flag
+param[1] = H11
+param[2] = H21
+param[3] = H12
+param[4] = H22
+
+The flag parameter defines the form of H:
+
+flag = -1 => H = ( H11 H12 H21 H22 )
+flag =  0 => H = ( 1.0 H12 H21 1.0 )
+flag =  1 => H = ( H11 1.0 -1.0 H22 )
+flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
+
+param may ONLY be stored on the device for the strided_batched
+version of this function.
+
+@param[in]
+stride_param [rocblas_stride]
+specifies the increment between the beginning of param_i and param_(i + 1).
+@param[in]
+batch_count [rocblas_int]
+the number of x and y arrays, i.e. the number of batches.
+*/
+    pub fn rocblas_srotm_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        param: *const f32,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_strided_batched(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        param: *const f64,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotm_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f32,
+        incy: i64,
+        stride_y: rocblas_stride,
+        param: *const f32,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotm_strided_batched_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut f64,
+        incy: i64,
+        stride_y: rocblas_stride,
+        param: *const f64,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotmg creates the modified Givens rotation matrix for the vector (d1 * x1, d2 * y1).
+Parameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in, out]
+d1      device pointer or host pointer to input scalar that is overwritten.
+@param[in, out]
+d2      device pointer or host pointer to input scalar that is overwritten.
+@param[in, out]
+x1      device pointer or host pointer to input scalar that is overwritten.
+@param[in]
+y1      device pointer or host pointer to input scalar.
+@param[out]
+param   device vector or host vector of five elements defining the rotation.
+
+param[0] = flag
+param[1] = H11
+param[2] = H21
+param[3] = H12
+param[4] = H22
+
+The flag parameter defines the form of H:
+
+flag = -1 => H = ( H11 H12 H21 H22 )
+flag =  0 => H = ( 1.0 H12 H21 1.0 )
+flag =  1 => H = ( H11 1.0 -1.0 H22 )
+flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
+
+param may be stored in either host or device memory.
+Location is specified by calling rocblas_set_pointer_mode.
+*/
+    pub fn rocblas_srotmg(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        d2: *mut f32,
+        x1: *mut f32,
+        y1: *const f32,
+        param: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        d2: *mut f64,
+        x1: *mut f64,
+        y1: *const f64,
+        param: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotmg_64(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        d2: *mut f32,
+        x1: *mut f32,
+        y1: *const f32,
+        param: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_64(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        d2: *mut f64,
+        x1: *mut f64,
+        y1: *const f64,
+        param: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotmg_batched creates the modified Givens rotation matrix for the batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.
+Parameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:
+
+- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.
+- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in, out]
+d1      device batched array or host batched array of input scalars that is overwritten.
+@param[in, out]
+d2      device batched array or host batched array of input scalars that is overwritten.
+@param[in, out]
+x1      device batched array or host batched array of input scalars that is overwritten.
+@param[in]
+y1      device batched array or host batched array of input scalars.
+@param[out]
+param   device batched array or host batched array of vectors of 5 elements defining the rotation.
+
+param[0] = flag
+param[1] = H11
+param[2] = H21
+param[3] = H12
+param[4] = H22
+
+The flag parameter defines the form of H:
+
+flag = -1 => H = ( H11 H12 H21 H22 )
+flag =  0 => H = ( 1.0 H12 H21 1.0 )
+flag =  1 => H = ( H11 1.0 -1.0 H22 )
+flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
+
+param may be stored in either host or device memory.
+Location is specified by calling rocblas_set_pointer_mode.
+
+@param[in]
+batch_count [rocblas_int]
+the number of instances in the batch.
+*/
+    pub fn rocblas_srotmg_batched(
+        handle: rocblas_handle,
+        d1: *const *mut f32,
+        d2: *const *mut f32,
+        x1: *const *mut f32,
+        y1: *const *const f32,
+        param: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_batched(
+        handle: rocblas_handle,
+        d1: *const *mut f64,
+        d2: *const *mut f64,
+        x1: *const *mut f64,
+        y1: *const *const f64,
+        param: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotmg_batched_64(
+        handle: rocblas_handle,
+        d1: *const *mut f32,
+        d2: *const *mut f32,
+        x1: *const *mut f32,
+        y1: *const *const f32,
+        param: *const *mut f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_batched_64(
+        handle: rocblas_handle,
+        d1: *const *mut f64,
+        d2: *const *mut f64,
+        x1: *const *mut f64,
+        y1: *const *const f64,
+        param: *const *mut f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rotmg_strided_batched creates the modified Givens rotation matrix for the strided batched vectors (d1_i * x1_i, d2_i * y1_i), for i = 1, ..., batch_count.
+Parameters may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode:
+
+- If the pointer mode is set to rocblas_pointer_mode_host, then this function blocks the CPU until the GPU has finished and the results are available in host memory.
+- If the pointer mode is set to rocblas_pointer_mode_device, then this function returns immediately and synchronization is required to read the results.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in, out]
+d1      device strided_batched array or host strided_batched array of input scalars that is overwritten.
+@param[in]
+stride_d1 [rocblas_stride]
+specifies the increment between the beginning of d1_i and d1_(i+1).
+@param[in, out]
+d2      device strided_batched array or host strided_batched array of input scalars that is overwritten.
+@param[in]
+stride_d2 [rocblas_stride]
+specifies the increment between the beginning of d2_i and d2_(i+1).
+@param[in, out]
+x1      device strided_batched array or host strided_batched array of input scalars that is overwritten.
+@param[in]
+stride_x1 [rocblas_stride]
+specifies the increment between the beginning of x1_i and x1_(i+1).
+@param[in]
+y1      device strided_batched array or host strided_batched array of input scalars.
+@param[in]
+stride_y1 [rocblas_stride]
+specifies the increment between the beginning of y1_i and y1_(i+1).
+@param[out]
+param   device strided_batched array or host strided_batched array of vectors of 5 elements defining the rotation.
+
+param[0] = flag
+param[1] = H11
+param[2] = H21
+param[3] = H12
+param[4] = H22
+The flag parameter defines the form of H:
+
+flag = -1 => H = ( H11 H12 H21 H22 )
+flag =  0 => H = ( 1.0 H12 H21 1.0 )
+flag =  1 => H = ( H11 1.0 -1.0 H22 )
+flag = -2 => H = ( 1.0 0.0 0.0 1.0 )
+
+param may be stored in either host or device memory.
+Location is specified by calling rocblas_set_pointer_mode.
+
+@param[in]
+stride_param [rocblas_stride]
+specifies the increment between the beginning of param_i and param_(i + 1).
+@param[in]
+batch_count [rocblas_int]
+the number of instances in the batch.
+*/
+    pub fn rocblas_srotmg_strided_batched(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        stride_d1: rocblas_stride,
+        d2: *mut f32,
+        stride_d2: rocblas_stride,
+        x1: *mut f32,
+        stride_x1: rocblas_stride,
+        y1: *const f32,
+        stride_y1: rocblas_stride,
+        param: *mut f32,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_strided_batched(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        stride_d1: rocblas_stride,
+        d2: *mut f64,
+        stride_d2: rocblas_stride,
+        x1: *mut f64,
+        stride_x1: rocblas_stride,
+        y1: *const f64,
+        stride_y1: rocblas_stride,
+        param: *mut f64,
+        stride_param: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_srotmg_strided_batched_64(
+        handle: rocblas_handle,
+        d1: *mut f32,
+        stride_d1: rocblas_stride,
+        d2: *mut f32,
+        stride_d2: rocblas_stride,
+        x1: *mut f32,
+        stride_x1: rocblas_stride,
+        y1: *const f32,
+        stride_y1: rocblas_stride,
+        param: *mut f32,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_drotmg_strided_batched_64(
+        handle: rocblas_handle,
+        d1: *mut f64,
+        stride_d1: rocblas_stride,
+        d2: *mut f64,
+        stride_d2: rocblas_stride,
+        x1: *mut f64,
+        stride_x1: rocblas_stride,
+        y1: *const f64,
+        stride_y1: rocblas_stride,
+        param: *mut f64,
+        stride_param: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+gbmv performs one of the matrix-vector operations:
+
+y := alpha*A*x    + beta*y,   or
+y := alpha*A**T*x + beta*y,   or
+y := alpha*A**H*x + beta*y,
+where alpha and beta are scalars, x and y are vectors and A is an
+m by n banded matrix with kl sub-diagonals and ku super-diagonals.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+trans     [rocblas_operation]
+indicates whether matrix A is tranposed (conjugated) or not.
+@param[in]
+m         [rocblas_int]
+number of rows of matrix A.
+@param[in]
+n         [rocblas_int]
+number of columns of matrix A.
+@param[in]
+kl        [rocblas_int]
+number of sub-diagonals of A.
+@param[in]
+ku        [rocblas_int]
+number of super-diagonals of A.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A     device pointer storing banded matrix A.
+Leading (kl + ku + 1) by n part of the matrix contains the coefficients
+of the banded matrix. The leading diagonal resides in row (ku + 1) with
+the first super-diagonal above on the RHS of row ku. The first sub-diagonal
+resides below on the LHS of row ku + 2. This propagates up and down across
+sub/super-diagonals.
+
+Ex: (m = n = 7; ku = 2, kl = 2)
+1 2 3 0 0 0 0             0 0 3 3 3 3 3
+4 1 2 3 0 0 0             0 2 2 2 2 2 2
+5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1
+0 5 4 1 2 3 0             4 4 4 4 4 4 0
+0 0 5 4 1 2 3             5 5 5 5 5 0 0
+0 0 0 5 4 1 2             0 0 0 0 0 0 0
+0 0 0 0 5 4 1             0 0 0 0 0 0 0
+
+Note that the empty elements which do not correspond to data will not
+be referenced.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. Must be >= (kl + ku + 1).
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_sgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgbmv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        x: *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        x: *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+gbmv_batched performs one of the matrix-vector operations:
+
+y_i := alpha*A_i*x_i    + beta*y_i,   or
+y_i := alpha*A_i**T*x_i + beta*y_i,   or
+y_i := alpha*A_i**H*x_i + beta*y_i,
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+m by n banded matrix with kl sub-diagonals and ku super-diagonals,
+for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+trans     [rocblas_operation]
+indicates whether matrix A is tranposed (conjugated) or not.
+@param[in]
+m         [rocblas_int]
+number of rows of each matrix A_i.
+@param[in]
+n         [rocblas_int]
+number of columns of each matrix A_i.
+@param[in]
+kl        [rocblas_int]
+number of sub-diagonals of each A_i.
+@param[in]
+ku        [rocblas_int]
+number of super-diagonals of each A_i.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A     device array of device pointers storing each banded matrix A_i.
+Leading (kl + ku + 1) by n part of the matrix contains the coefficients
+of the banded matrix. The leading diagonal resides in row (ku + 1) with
+the first super-diagonal above on the RHS of row ku. The first sub-diagonal
+resides below on the LHS of row ku + 2. This propagates up and down across
+sub/super-diagonals.
+
+Ex: (m = n = 7; ku = 2, kl = 2)
+1 2 3 0 0 0 0             0 0 3 3 3 3 3
+4 1 2 3 0 0 0             0 2 2 2 2 2 2
+5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1
+0 5 4 1 2 3 0             4 4 4 4 4 4 0
+0 0 5 4 1 2 3             5 5 5 5 5 0 0
+0 0 0 5 4 1 2             0 0 0 0 0 0 0
+0 0 0 0 5 4 1             0 0 0 0 0 0 0
+
+Note that the empty elements which do not correspond to data will not
+be referenced.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. Must be >= (kl + ku + 1)
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+batch_count [rocblas_int]
+specifies the number of instances in the batch.
+*/
+    pub fn rocblas_sgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgbmv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+gbmv_strided_batched performs one of the matrix-vector operations:
+
+y_i := alpha*A_i*x_i    + beta*y_i,   or
+y_i := alpha*A_i**T*x_i + beta*y_i,   or
+y_i := alpha*A_i**H*x_i + beta*y_i,
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+m by n banded matrix with kl sub-diagonals and ku super-diagonals,
+for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+trans     [rocblas_operation]
+indicates whether matrix A is tranposed (conjugated) or not.
+@param[in]
+m         [rocblas_int]
+number of rows of matrix A.
+@param[in]
+n         [rocblas_int]
+number of columns of matrix A.
+@param[in]
+kl        [rocblas_int]
+number of sub-diagonals of A.
+@param[in]
+ku        [rocblas_int]
+number of super-diagonals of A.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A     device pointer to first banded matrix (A_1).
+Leading (kl + ku + 1) by n part of the matrix contains the coefficients
+of the banded matrix. The leading diagonal resides in row (ku + 1) with
+the first super-diagonal above on the RHS of row ku. The first sub-diagonal
+resides below on the LHS of row ku + 2. This propagates up and down across
+sub/super-diagonals.
+
+Ex: (m = n = 7; ku = 2, kl = 2)
+1 2 3 0 0 0 0             0 0 3 3 3 3 3
+4 1 2 3 0 0 0             0 2 2 2 2 2 2
+5 4 1 2 3 0 0    ---->    1 1 1 1 1 1 1
+0 5 4 1 2 3 0             4 4 4 4 4 4 0
+0 0 5 4 1 2 3             5 5 5 5 5 0 0
+0 0 0 5 4 1 2             0 0 0 0 0 0 0
+0 0 0 0 5 4 1             0 0 0 0 0 0 0
+
+Note that the empty elements which do not correspond to data will not
+be referenced.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. Must be >= (kl + ku + 1).
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+x         device pointer to first vector (x_1).
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device pointer to first vector (y_1).
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+stride_y  [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (x_i+1).
+@param[in]
+batch_count [rocblas_int]
+specifies the number of instances in the batch.
+*/
+    pub fn rocblas_sgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv_strided_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        kl: rocblas_int,
+        ku: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgbmv_strided_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgbmv_strided_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgbmv_strided_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgbmv_strided_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        kl: i64,
+        ku: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+gemv performs one of the matrix-vector operations:
+
+y := alpha*A*x    + beta*y,   or
+y := alpha*A**T*x + beta*y,   or
+y := alpha*A**H*x + beta*y,
+where alpha and beta are scalars, x and y are vectors and A is an
+m by n matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+trans     [rocblas_operation]
+indicates whether matrix A is tranposed (conjugated) or not.
+@param[in]
+m         [rocblas_int]
+number of rows of matrix A.
+@param[in]
+n         [rocblas_int]
+number of columns of matrix A.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_sgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        x: *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        x: *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+gemv_batched performs a batch of matrix-vector operations:
+
+y_i := alpha*A_i*x_i    + beta*y_i,   or
+y_i := alpha*A_i**T*x_i + beta*y_i,   or
+y_i := alpha*A_i**H*x_i + beta*y_i,
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+m by n matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle      [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+trans       [rocblas_operation]
+indicates whether matrices A_i are tranposed (conjugated) or not.
+@param[in]
+m           [rocblas_int]
+number of rows of each matrix A_i.
+@param[in]
+n           [rocblas_int]
+number of columns of each matrix A_i.
+@param[in]
+alpha       device pointer or host pointer to scalar alpha.
+@param[in]
+A           device array of device pointers storing each matrix A_i.
+@param[in]
+lda         [rocblas_int]
+specifies the leading dimension of each matrix A_i.
+@param[in]
+x           device array of device pointers storing each vector x_i.
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+beta        device pointer or host pointer to scalar beta.
+@param[in, out]
+y           device array of device pointers storing each vector y_i.
+@param[in]
+incy        [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hshgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_half,
+        lda: rocblas_int,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut rocblas_half,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hssgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_half,
+        lda: rocblas_int,
+        x: *const *const rocblas_half,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tstgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_bfloat16,
+        lda: rocblas_int,
+        x: *const *const rocblas_bfloat16,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut rocblas_bfloat16,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tssgemv_batched(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_bfloat16,
+        lda: rocblas_int,
+        x: *const *const rocblas_bfloat16,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hshgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const rocblas_half,
+        lda: i64,
+        x: *const *const rocblas_half,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut rocblas_half,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hssgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const rocblas_half,
+        lda: i64,
+        x: *const *const rocblas_half,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tstgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const rocblas_bfloat16,
+        lda: i64,
+        x: *const *const rocblas_bfloat16,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut rocblas_bfloat16,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tssgemv_batched_64(
+        handle: rocblas_handle,
+        trans: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const rocblas_bfloat16,
+        lda: i64,
+        x: *const *const rocblas_bfloat16,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+gemv_strided_batched performs a batch of matrix-vector operations:
+
+y_i := alpha*A_i*x_i    + beta*y_i,   or
+y_i := alpha*A_i**T*x_i + beta*y_i,   or
+y_i := alpha*A_i**H*x_i + beta*y_i,
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+m by n matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle      [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA      [rocblas_operation]
+indicates whether matrices A_i are tranposed (conjugated) or not.
+@param[in]
+m           [rocblas_int]
+number of rows of matrices A_i.
+@param[in]
+n           [rocblas_int]
+number of columns of matrices A_i.
+@param[in]
+alpha       device pointer or host pointer to scalar alpha.
+@param[in]
+A           device pointer to the first matrix (A_1) in the batch.
+@param[in]
+lda         [rocblas_int]
+specifies the leading dimension of matrices A_i.
+@param[in]
+strideA     [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+x           device pointer to the first vector (x_1) in the batch.
+@param[in]
+incx        [rocblas_int]
+specifies the increment for the elements of vectors x_i.
+@param[in]
+stridex     [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. When trans equals rocblas_operation_none
+this typically means stride_x >= n * incx, otherwise stride_x >= m * incx.
+@param[in]
+beta        device pointer or host pointer to scalar beta.
+@param[in, out]
+y           device pointer to the first vector (y_1) in the batch.
+@param[in]
+incy        [rocblas_int]
+specifies the increment for the elements of vectors y_i.
+@param[in]
+stridey     [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+There are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. When trans equals rocblas_operation_none
+this typically means stride_y >= m * incy, otherwise stride_y >= n * incy. stridey should be non zero.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hshgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut rocblas_half,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hssgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_half,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tstgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_bfloat16,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut rocblas_bfloat16,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tssgemv_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_bfloat16,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_bfloat16,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hshgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const rocblas_half,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_half,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut rocblas_half,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hssgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const rocblas_half,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_half,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tstgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const rocblas_bfloat16,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_bfloat16,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut rocblas_bfloat16,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_tssgemv_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const rocblas_bfloat16,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_bfloat16,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hbmv performs the matrix-vector operations:
+
+y := alpha*A*x + beta*y
+where alpha and beta are scalars, x and y are n element vectors and A is an
+n by n Hermitian band matrix, with k super-diagonals.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: The upper triangular part of A is being supplied.
+- rocblas_fill_lower: The lower triangular part of A is being supplied.
+@param[in]
+n         [rocblas_int]
+the order of the matrix A.
+@param[in]
+k         [rocblas_int]
+the number of super-diagonals of the matrix A. Must be >= 0.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A         device pointer storing matrix A. Of dimension (lda, n).
+
+if uplo == rocblas_fill_upper:
+The leading (k + 1) by n part of A must contain the upper
+triangular band part of the Hermitian matrix, with the leading
+diagonal in row (k + 1), the first super-diagonal on the RHS
+of row k, etc.
+The top left k by x triangle of A will not be referenced.
+Ex (upper, lda = n = 4, k = 1):
+A                             Represented matrix
+(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)
+(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)
+(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)
+(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)
+
+if uplo == rocblas_fill_lower:
+The leading (k + 1) by n part of A must contain the lower
+triangular band part of the Hermitian matrix, with the leading
+diagonal in row (1), the first sub-diagonal on the LHS of
+row 2, etc.
+The bottom right k by k triangle of A will not be referenced.
+Ex (lower, lda = 2, n = 4, k = 1):
+A                               Represented matrix
+(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)
+(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)
+(0, 0) (6, 8) (3, 0) (7,-7)
+(0, 0) (0, 0) (7, 7) (4, 0)
+
+As a Hermitian matrix, the imaginary part of the main diagonal
+of A will not be referenced and is assumed to be == 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. must be >= k + 1.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_chbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hbmv_batched performs one of the matrix-vector operations:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an
+n by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.
+- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.
+@param[in]
+n         [rocblas_int]
+the order of each matrix A_i.
+@param[in]
+k         [rocblas_int]
+the number of super-diagonals of each matrix A_i. Must be >= 0.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix_i A of dimension (lda, n).
+
+if uplo == rocblas_fill_upper:
+The leading (k + 1) by n part of each A_i must contain the upper
+triangular band part of the Hermitian matrix, with the leading
+diagonal in row (k + 1), the first super-diagonal on the RHS
+of row k, etc.
+The top left k by x triangle of each A_i will not be referenced.
+Ex (upper, lda = n = 4, k = 1):
+A                             Represented matrix
+(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)
+(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)
+(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)
+(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)
+
+if uplo == rocblas_fill_lower:
+The leading (k + 1) by n part of each A_i must contain the lower
+triangular band part of the Hermitian matrix, with the leading
+diagonal in row (1), the first sub-diagonal on the LHS of
+row 2, etc.
+The bottom right k by k triangle of each A_i will not be referenced.
+Ex (lower, lda = 2, n = 4, k = 1):
+A                               Represented matrix
+(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)
+(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)
+(0, 0) (6, 8) (3, 0) (7,-7)
+(0, 0) (0, 0) (7, 7) (4, 0)
+
+As a Hermitian matrix, the imaginary part of the main diagonal
+of each A_i will not be referenced and is assumed to be == 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. must be >= max(1, n).
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hbmv_strided_batched performs one of the matrix-vector operations:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an
+n by n Hermitian band matrix with k super-diagonals, for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: The upper triangular part of each A_i is being supplied.
+- rocblas_fill_lower: The lower triangular part of each A_i is being supplied.
+@param[in]
+n         [rocblas_int]
+the order of each matrix A_i.
+@param[in]
+k         [rocblas_int]
+the number of super-diagonals of each matrix A_i. Must be >= 0.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A         device array pointing to the first matrix A_1. Each A_i is of dimension (lda, n).
+
+if uplo == rocblas_fill_upper:
+The leading (k + 1) by n part of each A_i must contain the upper
+triangular band part of the Hermitian matrix, with the leading
+diagonal in row (k + 1), the first super-diagonal on the RHS
+of row k, etc.
+The top left k by x triangle of each A_i will not be referenced.
+Ex (upper, lda = n = 4, k = 1):
+A                             Represented matrix
+(0,0) (5,9) (6,8) (7,7)       (1, 0) (5, 9) (0, 0) (0, 0)
+(1,0) (2,0) (3,0) (4,0)       (5,-9) (2, 0) (6, 8) (0, 0)
+(0,0) (0,0) (0,0) (0,0)       (0, 0) (6,-8) (3, 0) (7, 7)
+(0,0) (0,0) (0,0) (0,0)       (0, 0) (0, 0) (7,-7) (4, 0)
+
+if uplo == rocblas_fill_lower:
+The leading (k + 1) by n part of each A_i must contain the lower
+triangular band part of the Hermitian matrix, with the leading
+diagonal in row (1), the first sub-diagonal on the LHS of
+row 2, etc.
+The bottom right k by k triangle of each A_i will not be referenced.
+Ex (lower, lda = 2, n = 4, k = 1):
+A                               Represented matrix
+(1,0) (2,0) (3,0) (4,0)         (1, 0) (5,-9) (0, 0) (0, 0)
+(5,9) (6,8) (7,7) (0,0)         (5, 9) (2, 0) (6,-8) (0, 0)
+(0, 0) (6, 8) (3, 0) (7,-7)
+(0, 0) (0, 0) (7, 7) (4, 0)
+
+As a Hermitian matrix, the imaginary part of the main diagonal
+of each A_i will not be referenced and is assumed to be == 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. must be >= max(1, n).
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+x         device array pointing to the first vector y_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device array pointing to the first vector y_1.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+stride_y  [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hemv performs one of the matrix-vector operations:
+
+y := alpha*A*x + beta*y
+where alpha and beta are scalars, x and y are n element vectors and A is an
+n by n Hermitian matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.
+- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.
+@param[in]
+n         [rocblas_int]
+the order of the matrix A.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A         device pointer storing matrix A. Of dimension (lda, n).
+
+if uplo == rocblas_fill_upper:
+The upper triangular part of A must contain
+the upper triangular part of a Hermitian matrix. The lower
+triangular part of A will not be referenced.
+
+if uplo == rocblas_fill_lower:
+The lower triangular part of A must contain
+the lower triangular part of a Hermitian matrix. The upper
+triangular part of A will not be referenced.
+As a Hermitian matrix, the imaginary part of the main diagonal
+of A will not be referenced and is assumed to be == 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. must be >= max(1, n).
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_chemv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chemv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hemv_batched performs one of the matrix-vector operations:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an
+n by n Hermitian matrix, for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.
+- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.
+@param[in]
+n         [rocblas_int]
+the order of each matrix A_i.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i of dimension (lda, n).
+
+if uplo == rocblas_fill_upper:
+The upper triangular part of each A_i must contain
+the upper triangular part of a Hermitian matrix. The lower
+triangular part of each A_i will not be referenced.
+
+if uplo == rocblas_fill_lower:
+The lower triangular part of each A_i must contain
+the lower triangular part of a Hermitian matrix. The upper
+triangular part of each A_i will not be referenced.
+As a Hermitian matrix, the imaginary part of the main diagonal
+of each A_i will not be referenced and is assumed to be == 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. must be >= max(1, n).
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chemv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chemv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hemv_strided_batched performs one of the matrix-vector operations:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an
+n by n Hermitian matrix, for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied.
+- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied.
+@param[in]
+n         [rocblas_int]
+the order of each matrix A_i.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i of dimension (lda, n).
+
+if uplo == rocblas_fill_upper:
+The upper triangular part of each A_i must contain
+the upper triangular part of a Hermitian matrix. The lower
+triangular part of each A_i will not be referenced.
+
+if uplo == rocblas_fill_lower:
+The lower triangular part of each A_i must contain
+the lower triangular part of a Hermitian matrix. The upper
+triangular part of each A_i will not be referenced.
+As a Hermitian matrix, the imaginary part of the main diagonal
+of each A_i will not be referenced and is assumed to be == 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. must be >= max(1, n).
+@param[in]
+stride_A    [rocblas_stride]
+stride from the start of one (A_i) to the next (A_i+1).
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+stride_y  [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chemv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chemv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+her performs the matrix-vector operations:
+
+A := A + alpha*x*x**H
+where alpha is a real scalar, x is a vector, and A is an
+n by n Hermitian matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of A is supplied in A.
+- rocblas_fill_lower: The lower triangular part of A is supplied in A.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in, out]
+A         device pointer storing the specified triangular portion of the Hermitian matrix A. Of size (lda * n).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of the Hermitian matrix A is supplied.
+The lower triangluar portion will not be touched.
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of the Hermitian matrix A is supplied.
+The upper triangular portion will not be touched.
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. Must be at least max(1, n).*/
+    pub fn rocblas_cher(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+her_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*x_i**H
+where alpha is a real scalar, x_i is a vector, and A_i is an
+n by n symmetric matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in, out]
+A         device array of device pointers storing the specified triangular portion of
+each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The lower triangular portion of each A_i will not be touched.
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The upper triangular portion of each A_i will not be touched.
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. Must be at least max(1, n).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_cher_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        A: *const *mut rocblas_float_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        A: *const *mut rocblas_double_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+her_strided_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*x_i**H
+where alpha is a real scalar, x_i is a vector, and A_i is an
+n by n Hermitian matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in A.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in A.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer pointing to the first vector (x_1).
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in, out]
+A         device array of device pointers storing the specified triangular portion of
+each Hermitian matrix A_i. Points to the first matrix (A_1).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The lower triangular portion of each A_i will not be touched.
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The upper triangular portion of each A_i will not be touched.
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+stride_A    [rocblas_stride]
+stride from the start of one (A_i) and the next (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_cher_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+her2 performs the matrix-vector operations:
+
+A := A + alpha*x*y**H + conj(alpha)*y*x**H
+where alpha is a complex scalar, x and y are vectors, and A is an
+n by n Hermitian matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of A is supplied.
+- rocblas_fill_lower: The lower triangular part of A is supplied.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+A         device pointer storing the specified triangular portion of
+the Hermitian matrix A. Of size (lda, n).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of the Hermitian matrix A is supplied.
+The lower triangular portion of A will not be touched.
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of the Hermitian matrix A is supplied.
+The upper triangular portion of A will not be touched.
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. Must be at least max(lda, 1).*/
+    pub fn rocblas_cher2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+her2_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
+where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an
+n by n Hermitian matrix for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in, out]
+A         device array of device pointers storing the specified triangular portion of
+each Hermitian matrix A_i of size (lda, n).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The lower triangular portion of each A_i will not be touched.
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The upper triangular portion of each A_i will not be touched.
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. Must be at least max(lda, 1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_cher2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        A: *const *mut rocblas_float_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        A: *const *mut rocblas_double_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+her2_strided_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
+where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an
+n by n Hermitian matrix for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer pointing to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+specifies the stride between the beginning of one vector (x_i) and the next (x_i+1).
+@param[in]
+y         device pointer pointing to the first vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+stride_y  [rocblas_stride]
+specifies the stride between the beginning of one vector (y_i) and the next (y_i+1).
+@param[in, out]
+A         device pointer pointing to the first matrix (A_1). Stores the specified triangular portion of
+each Hermitian matrix A_i.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The lower triangular portion of each A_i will not be touched.
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The upper triangular portion of each A_i will not be touched.
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. Must be at least max(lda, 1).
+@param[in]
+stride_A  [rocblas_stride]
+specifies the stride between the beginning of one matrix (A_i) and the next (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_cher2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpmv performs the matrix-vector operation:
+
+y := alpha*A*x + beta*y
+where alpha and beta are scalars, x and y are n element vectors and A is an
+n by n Hermitian matrix, supplied in packed form (see description below).
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: the upper triangular part of the Hermitian matrix A is supplied in AP.
+- rocblas_fill_lower: the lower triangular part of the Hermitian matrix A is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the order of the matrix A. Must be >= 0.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+AP        device pointer storing the packed version of the specified triangular portion of
+the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of the Hermitian matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (3, 2)
+(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]
+(3,-2) (5, 1) (6, 0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of the Hermitian matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (3, 2)
+(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]
+(3,-2) (5, 1) (6, 0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_chpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        AP: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        AP: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        AP: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        AP: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpmv_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an
+n by n Hermitian matrix, supplied in packed form (see description below),
+for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.
+- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the order of each matrix A_i.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+AP      device pointer of device pointers storing the packed version of the specified triangular
+portion of each Hermitian matrix A_i. Each A_i is of at least size ((n * (n + 1)) / 2).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that each AP_i contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (3, 2)
+(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]
+(3,-2) (5, 1) (6, 0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that each AP_i contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (3, 2)
+(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]
+(3,-2) (5, 1) (6, 0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        AP: *const *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        AP: *const *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        AP: *const *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        AP: *const *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpmv_strided_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where alpha and beta are scalars, x_i and y_i are n element vectors and A_i is an
+n by n Hermitian matrix, supplied in packed form (see description below),
+for each batch in i = [1, batch_count].
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: the upper triangular part of each Hermitian matrix A_i is supplied in AP.
+- rocblas_fill_lower: the lower triangular part of each Hermitian matrix A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the order of each matrix A_i.
+@param[in]
+alpha     device pointer or host pointer to scalar alpha.
+@param[in]
+AP        device pointer pointing to the beginning of the first matrix (AP_1). Stores the packed
+version of the specified triangular portion of each Hermitian matrix AP_i of size ((n * (n + 1)) / 2).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that each AP_i contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (3, 2)
+(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,1),(4,0),(3,2),(5,-1),(6,0)]
+(3,-2) (5, 1) (6, 0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that each AP_i contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (3, 2)
+(2,-1) (4, 0) (5,-1) ---> [(1,0),(2,-1),(3,-2),(4,0),(5,1),(6,0)]
+(3,-2) (5, 1) (6, 0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (AP_i) and the next one (AP_i+1).
+@param[in]
+x         device array pointing to the beginning of the first vector (x_1).
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[in, out]
+y         device array pointing to the beginning of the first vector (y_1).
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+stride_y  [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        AP: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        AP: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        AP: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        AP: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpr performs the matrix-vector operations:
+
+A := A + alpha*x*x**H
+where alpha is a real scalar, x is a vector, and A is an
+n by n Hermitian matrix, supplied in packed form.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of A is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of A is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in, out]
+AP        device pointer storing the packed version of the specified triangular portion of
+the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of the Hermitian matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]
+(4,-9) (5,-3) (6,0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of the Hermitian matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]
+(4,-9) (5,-3) (6,0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.*/
+    pub fn rocblas_chpr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpr_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*x_i**H
+where alpha is a real scalar, x_i is a vector, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in, out]
+AP        device array of device pointers storing the packed version of the specified triangular portion of
+each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]
+(4,-9) (5,-3) (6,0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]
+(4,-9) (5,-3) (6,0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_chpr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpr_strided_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*x_i**H
+where alpha is a real scalar, x_i is a vector, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer pointing to the first vector (x_1).
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in, out]
+AP        device array of device pointers storing the packed version of the specified triangular portion of
+each Hermitian matrix A_i. Points to the first matrix (A_1).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]
+(4,-9) (5,-3) (6,0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]
+(4,-9) (5,-3) (6,0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+stride_A    [rocblas_stride]
+stride from the start of one (A_i) and the next (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_chpr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpr2 performs the matrix-vector operations:
+
+A := A + alpha*x*y**H + conj(alpha)*y*x**H
+where alpha is a complex scalar, x and y are vectors, and A is an
+n by n Hermitian matrix, supplied in packed form.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of A is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of A is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+AP        device pointer storing the packed version of the specified triangular portion of
+the Hermitian matrix A. Of at least size ((n * (n + 1)) / 2).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of the Hermitian matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]
+(4,-9) (5,-3) (6,0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of the Hermitian matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]
+(4,-9) (5,-3) (6,0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.*/
+    pub fn rocblas_chpr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpr2_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
+where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in, out]
+AP        device array of device pointers storing the packed version of the specified triangular portion of
+each Hermitian matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]
+(4,-9) (5,-3) (6,0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) --> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]
+(4,-9) (5,-3) (6,0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_chpr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+hpr2_strided_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*y_i**H + conj(alpha)*y_i*x_i**H
+where alpha is a complex scalar, x_i and y_i are vectors, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer pointing to the first vector (x_1).
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in]
+y         device pointer pointing to the first vector (y_1).
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+stride_y  [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+@param[in, out]
+AP        device array of device pointers storing the packed version of the specified triangular portion of
+each Hermitian matrix A_i. Points to the first matrix (A_1).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,1),(3,0),(4,9),(5,3),(6,0)]
+(4,-9) (5,-3) (6,0)
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each Hermitian matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 3)
+(1, 0) (2, 1) (4,9)
+(2,-1) (3, 0) (5,3) ---> [(1,0),(2,-1),(4,-9),(3,0),(5,-3),(6,0)]
+(4,-9) (5,-3) (6,0)
+Note that the imaginary part of the diagonal elements are not accessed
+and are assumed to be 0.
+@param[in]
+stride_A    [rocblas_stride]
+stride from the start of one (A_i) and the next (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_chpr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chpr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhpr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stride_y: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+trmv performs one of the matrix-vector operations:
+
+x = A*x or
+x = A**T*x or
+x = A**H*x
+where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular matrix.
+The vector x is overwritten.
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of A. n >= 0.
+
+@param[in]
+A         device pointer storing matrix A, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A, otherwise the lower triangular part of the leading n-by-n array contains the matrix A.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. lda must be at least max( 1, n ).
+
+@param[in, out]
+x         device pointer storing vector x. On exit, x is overwritten with the transformed vector x.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+*/
+    pub fn rocblas_strmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f32,
+        lda: i64,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f64,
+        lda: i64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+trmv_batched performs one of the matrix-vector operations:
+
+x_i = A_i*x_i or
+x_i = A_i**T*x_i or
+x_i = A_i**H*x_i, 0 < i < batch_count
+where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
+The vectors x_i are overwritten.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix.
+- rocblas_fill_lower:  A_i is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of matrices A_i. n >= 0.
+
+@param[in]
+A         device pointer to an array of device pointers to the A_i matrices, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A_i. lda must be at least max( 1, n ).
+
+@param[in, out]
+x         device pointer to an array of device pointers to the x_i vectors. On exit, each x_i is overwritten with the transformed vector x_i.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of vectors x_i.
+
+@param[in]
+batch_count [rocblas_int]
+The number of batched matrices/vectors.
+
+*/
+    pub fn rocblas_strmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+trmv_strided_batched performs one of the matrix-vector operations:
+
+x_i = A_i*x_i or
+x_i = A_i**T*x_i, or
+x_i = A_i**H*x_i, 0 < i < batch_count
+where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
+with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
+
+The vectors x_i are overwritten.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix.
+- rocblas_fill_lower:  A_i is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of matrices A_i. n >= 0.
+
+@param[in]
+A         device pointer to the matrix A_1 of the batch, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A_i. lda must be at least max( 1, n ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one A_i matrix to the next A_{i + 1}.
+
+@param[in, out]
+x         device pointer to the vector x_1 of the batch. On exit, each x_i is overwritten with the transformed vector x_i.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of one vector x.
+
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one x_i vector to the next x_{i + 1}.
+
+@param[in]
+batch_count [rocblas_int]
+The number of batched matrices/vectors.
+
+*/
+    pub fn rocblas_strmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tpmv performs one of the matrix-vector operations:
+
+x = A*x or
+x = A**T*x or
+x = A**H*x
+where x is an n element vector and A is an n by n unit, or non-unit,
+upper or lower triangular matrix, supplied in the pack form.
+The vector x is overwritten.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows of A. n >= 0.
+
+@param[in]
+A       device pointer storing matrix A,
+of dimension at leat ( n * ( n + 1 ) / 2 ).
+- Before entry with uplo = rocblas_fill_upper, the array A
+must contain the upper triangular matrix packed sequentially,
+column by column, so that
+A[0] contains a_{0,0}, A[1] and A[2] contain
+a_{0,1} and a_{1, 1}, respectively, and so on.
+
+- Before entry with uplo = rocblas_fill_lower, the array A
+must contain the lower triangular matrix packed sequentially,
+column by column, so that
+A[0] contains a_{0,0}, A[1] and A[2] contain
+a_{1,0} and a_{2,0}, respectively, and so on.
+
+Note that when DIAG = rocblas_diagonal_unit, the diagonal elements of A are
+not referenced, but are assumed to be unity.
+
+@param[in, out]
+x      device pointer storing vector x. On exit, x is overwritten with the transformed vector x.
+
+@param[in]
+incx    [rocblas_int]
+specifies the increment for the elements of x. incx must not be zero.
+*/
+    pub fn rocblas_stpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stpmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f32,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tpmv_batched performs one of the matrix-vector operations:
+
+x_i = A_i*x_i or
+x_i = A_i**T*x_i or
+x_i = A_i**H*x_i, 0 < i < batch_count
+where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
+The vectors x_i are overwritten.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix.
+- rocblas_fill_lower:  A_i is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of matrices A_i. n >= 0.
+
+@param[in]
+A         device pointer to an array of device pointers to the A_i matrices, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.
+
+@param[in, out]
+x         device pointer to an array of device pointers to the x_i vectors. On exit, each x_i is overwritten with the transformed vector x_i.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of vectors x_i.
+
+@param[in]
+batch_count [rocblas_int]
+The number of batched matrices/vectors.
+
+*/
+    pub fn rocblas_stpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f32,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f64,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stpmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const f32,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const f64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tpmv_strided_batched performs one of the matrix-vector operations:
+
+x_i = A_i*x_i or
+x_i = A_i**T*x_i or
+x_i = A_i**H*x_i, 0 < i < batch_count
+where x_i is an n element vector and A_i is an n by n (unit, or non-unit, upper or lower triangular matrix)
+with strides specifying how to retrieve $x_i$ (resp. $A_i$) from $x_{i-1}$ (resp. $A_i$).
+The vectors x_i are overwritten.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix.
+- rocblas_fill_lower:  A_i is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of matrices A_i. n >= 0.
+
+@param[in]
+A       device pointer to the matrix A_1 of the batch, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one A_i matrix to the next A_{i + 1}.
+
+@param[in, out]
+x       device pointer to the vector x_1 of the batch. On exit, each x_i is overwritten with the transformed vector x_i.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of one vector x.
+
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one x_i vector to the next x_{i + 1}.
+
+@param[in]
+batch_count [rocblas_int]
+The number of batched matrices/vectors.
+
+*/
+    pub fn rocblas_stpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stpmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f32,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tbmv performs one of the matrix-vector operations:
+
+x := A*x      or
+x := A**T*x   or
+x := A**H*x,
+x is a vectors and A is a banded n by n matrix (see description below).
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: A is an upper banded triangular matrix.
+- rocblas_fill_lower: A is a  lower banded triangular matrix.
+@param[in]
+trans     [rocblas_operation]
+indicates whether matrix A is tranposed (conjugated) or not.
+@param[in]
+diag      [rocblas_diagonal]
+- rocblas_diagonal_unit: The main diagonal of A is assumed to consist of only
+1's and is not referenced.
+- rocblas_diagonal_non_unit: No assumptions are made of A's main diagonal.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of the matrix represented by A.
+@param[in]
+k         [rocblas_int]
+
+if uplo == rocblas_fill_upper, k specifies the number of super-diagonals
+of the matrix A.
+
+if uplo == rocblas_fill_lower, k specifies the number of sub-diagonals
+of the matrix A.
+k must satisfy k > 0 && k < lda.
+@param[in]
+A         device pointer storing banded triangular matrix A.
+
+if uplo == rocblas_fill_upper:
+The matrix represented is an upper banded triangular matrix
+with the main diagonal and k super-diagonals, everything
+else can be assumed to be 0.
+The matrix is compacted so that the main diagonal resides on the k'th
+row, the first super diagonal resides on the RHS of the k-1'th row, etc,
+with the k'th diagonal on the RHS of the 0'th row.
+Ex: (rocblas_fill_upper; n = 5; k = 2)
+1 6 9 0 0              0 0 9 8 7
+0 2 7 8 0              0 6 7 8 9
+0 0 3 8 7     ---->    1 2 3 4 5
+0 0 0 4 9              0 0 0 0 0
+0 0 0 0 5              0 0 0 0 0
+
+if uplo == rocblas_fill_lower:
+The matrix represnted is a lower banded triangular matrix
+with the main diagonal and k sub-diagonals, everything else can be
+assumed to be 0.
+The matrix is compacted so that the main diagonal resides on the 0'th row,
+working up to the k'th diagonal residing on the LHS of the k'th row.
+Ex: (rocblas_fill_lower; n = 5; k = 2)
+1 0 0 0 0              1 2 3 4 5
+6 2 0 0 0              6 7 8 9 0
+9 7 3 0 0     ---->    9 8 7 0 0
+0 8 8 4 0              0 0 0 0 0
+0 0 7 9 5              0 0 0 0 0
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. lda must satisfy lda > k.
+@param[in, out]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+*/
+    pub fn rocblas_stbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f32,
+        lda: i64,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f64,
+        lda: i64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tbmv_batched performs one of the matrix-vector operations:
+
+x_i := A_i*x_i      or
+x_i := A_i**T*x_i   or
+x_i := A_i**H*x_i,
+where (A_i, x_i) is the i-th instance of the batch.
+x_i is a vector and A_i is an n by n matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: each A_i is an upper banded triangular matrix.
+- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.
+@param[in]
+trans     [rocblas_operation]
+indicates whether each matrix A_i is tranposed (conjugated) or not.
+@param[in]
+diag      [rocblas_diagonal]
+- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only
+1's and is not referenced.
+- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of the matrix represented by each A_i.
+@param[in]
+k         [rocblas_int]
+
+if uplo == rocblas_fill_upper, k specifies the number of super-diagonals
+of each matrix A_i.
+
+if uplo == rocblas_fill_lower, k specifies the number of sub-diagonals
+of each matrix A_i.
+k must satisfy k > 0 && k < lda.
+@param[in]
+A         device array of device pointers storing each banded triangular matrix A_i.
+
+if uplo == rocblas_fill_upper:
+The matrix represented is an upper banded triangular matrix
+with the main diagonal and k super-diagonals, everything
+else can be assumed to be 0.
+The matrix is compacted so that the main diagonal resides on the k'th
+row, the first super diagonal resides on the RHS of the k-1'th row, etc,
+with the k'th diagonal on the RHS of the 0'th row.
+Ex: (rocblas_fill_upper; n = 5; k = 2)
+1 6 9 0 0              0 0 9 8 7
+0 2 7 8 0              0 6 7 8 9
+0 0 3 8 7     ---->    1 2 3 4 5
+0 0 0 4 9              0 0 0 0 0
+0 0 0 0 5              0 0 0 0 0
+
+if uplo == rocblas_fill_lower:
+The matrix represnted is a lower banded triangular matrix
+with the main diagonal and k sub-diagonals, everything else can be
+assumed to be 0.
+The matrix is compacted so that the main diagonal resides on the 0'th row,
+working up to the k'th diagonal residing on the LHS of the k'th row.
+Ex: (rocblas_fill_lower; n = 5; k = 2)
+1 0 0 0 0              1 2 3 4 5
+6 2 0 0 0              6 7 8 9 0
+9 7 3 0 0     ---->    9 8 7 0 0
+0 8 8 4 0              0 0 0 0 0
+0 0 7 9 5              0 0 0 0 0
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. lda must satisfy lda > k.
+@param[in, out]
+x         device array of device pointer storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_stbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tbmv_strided_batched performs one of the matrix-vector operations:
+
+x_i := A_i*x_i      or
+x_i := A_i**T*x_i   or
+x_i := A_i**H*x_i,
+where (A_i, x_i) is the i-th instance of the batch.
+x_i is a vector and A_i is an n by n matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+- rocblas_fill_upper: each A_i is an upper banded triangular matrix.
+- rocblas_fill_lower: each A_i is a  lower banded triangular matrix.
+@param[in]
+trans     [rocblas_operation]
+indicates whether each matrix A_i is tranposed (conjugated) or not.
+@param[in]
+diag      [rocblas_diagonal]
+- rocblas_diagonal_unit: The main diagonal of each A_i is assumed to consist of only
+1's and is not referenced.
+- rocblas_diagonal_non_unit: No assumptions are made of each A_i's main diagonal.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of the matrix represented by each A_i.
+@param[in]
+k         [rocblas_int]
+
+if uplo == rocblas_fill_upper, k specifies the number of super-diagonals
+of each matrix A_i.
+
+if uplo == rocblas_fill_lower, k specifies the number of sub-diagonals
+of each matrix A_i.
+k must satisfy k > 0 && k < lda.
+@param[in]
+A         device array to the first matrix A_i of the batch. Stores each banded triangular matrix A_i.
+
+if uplo == rocblas_fill_upper:
+The matrix represented is an upper banded triangular matrix
+with the main diagonal and k super-diagonals, everything
+else can be assumed to be 0.
+The matrix is compacted so that the main diagonal resides on the k'th
+row, the first super diagonal resides on the RHS of the k-1'th row, etc,
+with the k'th diagonal on the RHS of the 0'th row.
+Ex: (rocblas_fill_upper; n = 5; k = 2)
+1 6 9 0 0              0 0 9 8 7
+0 2 7 8 0              0 6 7 8 9
+0 0 3 8 7     ---->    1 2 3 4 5
+0 0 0 4 9              0 0 0 0 0
+0 0 0 0 5              0 0 0 0 0
+
+if uplo == rocblas_fill_lower:
+The matrix represnted is a lower banded triangular matrix
+with the main diagonal and k sub-diagonals, everything else can be
+assumed to be 0.
+The matrix is compacted so that the main diagonal resides on the 0'th row,
+working up to the k'th diagonal residing on the LHS of the k'th row.
+Ex: (rocblas_fill_lower; n = 5; k = 2)
+1 0 0 0 0              1 2 3 4 5
+6 2 0 0 0              6 7 8 9 0
+9 7 3 0 0     ---->    9 8 7 0 0
+0 8 8 4 0              0 0 0 0 0
+0 0 7 9 5              0 0 0 0 0
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. lda must satisfy lda > k.
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one A_i matrix to the next A_(i + 1).
+@param[in, out]
+x         device array to the first vector x_i of the batch.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one x_i matrix to the next x_(i + 1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_stbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tbsv solves:
+
+A*x = b or
+A**T*x = b or
+A**H*x = b
+where x and b are vectors and A is a banded triangular matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none: Solves A*x = b
+- rocblas_operation_transpose: Solves A**T*x = b
+- rocblas_operation_conjugate_transpose: Solves A**H*x = b
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit: A is assumed to be unit triangular (i.e. the diagonal elements
+of A are not used in computations).
+- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of b. n >= 0.
+@param[in]
+k         [rocblas_int]
+
+if(uplo == rocblas_fill_upper)
+k specifies the number of super-diagonals of A.
+if(uplo == rocblas_fill_lower)
+k specifies the number of sub-diagonals of A.
+k >= 0.
+
+@param[in]
+A         device pointer storing the matrix A in banded format.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+lda >= (k + 1).
+
+@param[in, out]
+x         device pointer storing input vector b. Overwritten by the output vector x.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+*/
+    pub fn rocblas_stbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stbsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f32,
+        lda: i64,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f64,
+        lda: i64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tbsv_batched solves:
+
+A_i*x_i = b_i or
+A_i**T*x_i = b_i or
+A_i**H*x_i = b_i
+where x_i and b_i are vectors and A_i is a banded triangular matrix,
+for i = [1, batch_count].
+
+The input vectors b_i are overwritten by the output vectors x_i.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix.
+- rocblas_fill_lower:  A_i is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none: Solves A_i*x_i = b_i
+- rocblas_operation_transpose: Solves A_i**T*x_i = b_i
+- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements
+of each A_i are not used in computations).
+- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of each b_i. n >= 0.
+@param[in]
+k         [rocblas_int]
+
+if(uplo == rocblas_fill_upper)
+k specifies the number of super-diagonals of each A_i.
+if(uplo == rocblas_fill_lower)
+k specifies the number of sub-diagonals of each A_i.
+k >= 0.
+
+@param[in]
+A         device vector of device pointers storing each matrix A_i in banded format.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+lda >= (k + 1).
+
+@param[in, out]
+x         device vector of device pointers storing each input vector b_i. Overwritten by each output
+vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_stbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stbsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tbsv_strided_batched solves:
+
+A_i*x_i = b_i or
+A_i**T*x_i = b_i or
+A_i**H*x_i = b_i
+where x_i and b_i are vectors and A_i is a banded triangular matrix,
+for i = [1, batch_count].
+
+The input vectors b_i are overwritten by the output vectors x_i.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix.
+- rocblas_fill_lower:  A_i is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none: Solves A_i*x_i = b_i
+- rocblas_operation_transpose: Solves A_i**T*x_i = b_i
+- rocblas_operation_conjugate_transpose: Solves A_i**H*x_i = b_i
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements
+of each A_i are not used in computations).
+- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of each b_i. n >= 0.
+@param[in]
+k         [rocblas_int]
+
+if(uplo == rocblas_fill_upper)
+k specifies the number of super-diagonals of each A_i.
+if(uplo == rocblas_fill_lower)
+k specifies the number of sub-diagonals of each A_i.
+k >= 0.
+
+@param[in]
+A         device pointer pointing to the first banded matrix A_1.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+lda >= (k + 1).
+@param[in]
+stride_A  [rocblas_stride]
+specifies the distance between the start of one matrix (A_i) and the next (A_i+1).
+
+@param[in, out]
+x         device pointer pointing to the first input vector b_1. Overwritten by output vectors x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+specifies the distance between the start of one vector (x_i) and the next (x_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_stbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        k: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stbsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtbsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctbsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztbsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        k: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+trsv solves:
+
+A*x = b or
+A**T*x = b or
+A**H*x = b,
+where x and b are vectors and A is a triangular matrix.
+The vector x is overwritten on b.
+
+Although not widespread, some gemm kernels used by trsv may use atomic operations.
+See Atomic Operations in the API Reference Guide for more information.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of b. n >= 0.
+
+@param[in]
+A         device pointer storing matrix A, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A, otherwise the lower triangular part of the leading n-by-n array contains the matrix A.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. lda must be at least max( 1, n ).
+
+@param[in, out]
+x         device pointer storing vector x. On exit, x is overwritten with the transformed vector x.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+*/
+    pub fn rocblas_strsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f32,
+        lda: i64,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f64,
+        lda: i64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+trsv_batched solves:
+
+A_i*x_i = b_i or
+A_i**T*x_i = b_i or
+A_i**H*x_i = b_i,
+where (A_i, x_i, b_i) is the i-th instance of the batch.
+x_i and b_i are vectors and A_i is an
+n by n triangular matrix.
+
+The vector x is overwritten on b.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of b. n >= 0.
+
+@param[in]
+A         device pointer to an array of device pointers to the A_i matrices, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A_i. lda must be at least max( 1, n ).
+
+@param[in, out]
+x         device pointer to an array of device pointers to the x_i vectors. On exit, each x_i is overwritten with the transformed vector x_i.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_strsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+trsv_strided_batched solves:
+
+A_i*x_i = b_i or
+A_i**T*x_i = b_i or
+A_i**H*x_i = b_i,
+where (A_i, x_i, b_i) is the i-th instance of the batch.
+x_i and b_i are vectors and A_i is an n by n triangular matrix, for i = 1, ..., batch_count.
+
+The vector x is overwritten on b.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA     [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:   op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of each b_i. n >= 0.
+
+@param[in]
+A         device pointer to the matrix A_1 of the batch, of dimension ( lda, n ). If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix A_i, otherwise the lower triangular part of the leading n-by-n array contains the matrix A_i.
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one A_i matrix to the next A_(i + 1).
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A_i. lda must be at least max( 1, n ).
+
+@param[in, out]
+x         device pointer to the vector x_1 of the batch. On exit, each x_i is overwritten with the transformed vector x_i.
+
+@param[in]
+stride_x [rocblas_stride]
+stride from the start of one x_i vector to the next x_(i + 1)
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_strsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tpsv solves:
+
+A*x = b or
+A**T*x = b or
+A**H*x = b
+where x and b are vectors and A is a triangular matrix stored in the packed format.
+
+The input vector b is overwritten by the output vector x.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_none: Solves A*x = b
+- rocblas_operation_transpose: Solves A**T*x = b
+- rocblas_operation_conjugate_transpose: Solves A**H*x = b
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:  A is assumed to be unit triangular (i.e. the diagonal elements
+of A are not used in computations).
+- rocblas_diagonal_non_unit: A is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of b. n >= 0.
+
+@param[in]
+AP        device pointer storing the packed version of matrix A,
+of dimension >= (n * (n + 1) / 2).
+
+@param[in, out]
+x         device pointer storing vector b on input, overwritten by x on output.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+*/
+    pub fn rocblas_stpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f32,
+        x: *mut f32,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f64,
+        x: *mut f64,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stpsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const f32,
+        x: *mut f32,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const f64,
+        x: *mut f64,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const rocblas_float_complex,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const rocblas_double_complex,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tpsv_batched solves:
+
+A_i*x_i = b_i or
+A_i**T*x_i = b_i or
+A_i**H*x_i = b_i
+where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,
+for i in [1, batch_count].
+
+The input vectors b_i are overwritten by the output vectors x_i.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  each A_i is an upper triangular matrix.
+- rocblas_fill_lower:  each A_i is a  lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_none: Solves A*x = b
+- rocblas_operation_transpose: Solves A**T*x = b
+- rocblas_operation_conjugate_transpose: Solves A**H*x = b
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit: Each A_i is assumed to be unit triangular (i.e. the diagonal elements
+of each A_i are not used in computations).
+- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of each b_i. n >= 0.
+
+@param[in]
+AP        device array of device pointers storing the packed versions of each matrix A_i,
+of dimension >= (n * (n + 1) / 2).
+
+@param[in, out]
+x         device array of device pointers storing each input vector b_i, overwritten by x_i on output.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+batch_count [rocblas_int]
+specifies the number of instances in the batch.
+*/
+    pub fn rocblas_stpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const f32,
+        x: *const *mut f32,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const f64,
+        x: *const *mut f64,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stpsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const *const f32,
+        x: *const *mut f32,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const *const f64,
+        x: *const *mut f64,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const *const rocblas_float_complex,
+        x: *const *mut rocblas_float_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const *const rocblas_double_complex,
+        x: *const *mut rocblas_double_complex,
+        incx: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+tpsv_strided_batched solves:
+
+A_i*x_i = b_i or
+A_i**T*x_i = b_i or
+A_i**H*x_i = b_i
+where x_i and b_i are vectors and A_i is a triangular matrix stored in the packed format,
+for i in [1, batch_count].
+
+The input vectors b_i are overwritten by the output vectors x_i.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  each A_i is an upper triangular matrix.
+- rocblas_fill_lower:  each A_i is a  lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_none: Solves A*x = b
+- rocblas_operation_transpose: Solves A**T*x = b
+- rocblas_operation_conjugate_transpose: Solves A**H*x = b
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular (i.e. the diagonal elements
+of each A_i are not used in computations).
+- rocblas_diagonal_non_unit: each A_i is not assumed to be unit triangular.
+
+@param[in]
+n         [rocblas_int]
+n specifies the number of rows of each b_i. n >= 0.
+
+@param[in]
+AP        device pointer pointing to the first packed matrix A_1,
+of dimension >= (n * (n + 1) / 2).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the beginning of one packed matrix (AP_i) and the next (AP_i+1).
+
+@param[in, out]
+x         device pointer pointing to the first input vector b_1. Overwritten by each x_i on output.
+
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the beginning of one vector (x_i) and the next (x_i+1).
+@param[in]
+batch_count [rocblas_int]
+specifies the number of instances in the batch.
+*/
+    pub fn rocblas_stpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f32,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const f64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        AP: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_stpsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const f32,
+        stride_A: rocblas_stride,
+        x: *mut f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtpsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const f64,
+        stride_A: rocblas_stride,
+        x: *mut f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctpsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const rocblas_float_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztpsv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        n: i64,
+        AP: *const rocblas_double_complex,
+        stride_A: rocblas_stride,
+        x: *mut rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+symv performs the matrix-vector operation:
+
+y := alpha*A*x + beta*y
+where alpha and beta are scalars, x and y are n element vectors and
+A should contain an upper or lower triangular n by n symmetric matrix.
+
+symv has an implementation which uses atomic operations. See Atomic Operations
+in the API Reference Guide for more information.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo     [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced.
+- if rocblas_fill_lower, the upper part of A is not referenced.
+@param[in]
+n         [rocblas_int]
+@param[in]
+alpha
+specifies the scalar alpha.
+@param[in]
+A         pointer storing matrix A on the GPU
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+x         pointer storing vector x on the GPU.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      specifies the scalar beta
+@param[out]
+y         pointer storing vector y on the GPU.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_ssymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssymv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        x: *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        x: *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+symv_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+n by n symmetric matrix, for i = 1, ..., batch_count.
+A a should contain an upper or lower triangular symmetric matrix
+and the opposing triangular part of A is not referenced.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced.
+- if rocblas_fill_lower, the upper part of A is not referenced.
+@param[in]
+n         [rocblas_int]
+number of rows and columns of each matrix A_i.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each matrix A_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssymv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        beta: *const rocblas_float_complex,
+        y: *const *mut rocblas_float_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        beta: *const rocblas_double_complex,
+        y: *const *mut rocblas_double_complex,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+symv_strided_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+n by n symmetric matrix, for i = 1, ..., batch_count.
+A a should contain an upper or lower triangular symmetric matrix
+and the opposing triangular part of A is not referenced.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+number of rows and columns of each matrix A_i.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+A         Device pointer to the first matrix A_1 on the GPU.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each matrix A_i.
+@param[in]
+strideA     [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+x         Device pointer to the first vector x_1 on the GPU.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+stridex     [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stridex is of appropriate size.
+This typically means stridex >= n * incx. stridex should be non zero.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[out]
+y         Device pointer to the first vector y_1 on the GPU.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+stridey     [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+There are no restrictions placed on stride_y. However, ensure that stridey is of appropriate size.
+This typically means stridey >= n * incy. stridey should be non zero.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssymv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        y: *mut rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        y: *mut rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spmv performs the matrix-vector operation:
+
+y := alpha*A*x + beta*y
+where alpha and beta are scalars, x and y are n element vectors and
+A should contain an upper or lower triangular n by n packed symmetric matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      rocblas_fill
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+@param[in]
+alpha
+specifies the scalar alpha.
+@param[in]
+A         pointer storing matrix A on the GPU.
+@param[in]
+x         pointer storing vector x on the GPU.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      specifies the scalar beta.
+@param[out]
+y         pointer storing vector y on the GPU.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_sspmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        x: *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        x: *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spmv_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+n by n symmetric matrix, for i = 1, ..., batch_count.
+A should contain an upper or lower triangular n by n packed symmetric matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+number of rows and columns of each matrix A_i.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sspmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        x: *const *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        x: *const *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spmv_strided_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+n by n symmetric matrix, for i = 1, ..., batch_count.
+A should contain an upper or lower triangular n by n packed symmetric matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+number of rows and columns of each matrix A_i.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+A         Device pointer to the first matrix A_1 on the GPU.
+@param[in]
+strideA     [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+x         Device pointer to the first vector x_1 on the GPU.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+stridex     [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stridex. However, ensure that stridex is of appropriate size.
+This typically means stridex >= n * incx. stridex should be non zero.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[out]
+y         Device pointer to the first vector y_1 on the GPU.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+stridey     [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+There are no restrictions placed on stridey. However, ensure that stridey is of appropriate size.
+This typically means stridey >= n * incy. stridey should be non zero.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sspmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+sbmv performs the matrix-vector operation:
+
+y := alpha*A*x + beta*y
+where alpha and beta are scalars, x and y are n element vectors and
+A should contain an upper or lower triangular n by n symmetric banded matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      rocblas_fill
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+@param[in]
+k         [rocblas_int]
+specifies the number of sub- and super-diagonals.
+@param[in]
+alpha
+specifies the scalar alpha.
+@param[in]
+A         pointer storing matrix A on the GPU.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of matrix A.
+@param[in]
+x         pointer storing vector x on the GPU.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+beta      specifies the scalar beta.
+@param[out]
+y         pointer storing vector y on the GPU.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+*/
+    pub fn rocblas_ssbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        x: *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        x: *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+sbmv_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+n by n symmetric banded matrix, for i = 1, ..., batch_count.
+A should contain an upper or lower triangular n by n symmetric banded matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+number of rows and columns of each matrix A_i.
+@param[in]
+k         [rocblas_int]
+specifies the number of sub- and super-diagonals.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each matrix A_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *const f32,
+        incx: i64,
+        beta: *const f32,
+        y: *const *mut f32,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *const f64,
+        incx: i64,
+        beta: *const f64,
+        y: *const *mut f64,
+        incy: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+sbmv_strided_batched performs the matrix-vector operation:
+
+y_i := alpha*A_i*x_i + beta*y_i
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha and beta are scalars, x_i and y_i are vectors and A_i is an
+n by n symmetric banded matrix, for i = 1, ..., batch_count.
+A should contain an upper or lower triangular n by n symmetric banded matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+number of rows and columns of each matrix A_i.
+@param[in]
+k         [rocblas_int]
+specifies the number of sub- and super-diagonals.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+A         Device pointer to the first matrix A_1 on the GPU.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each matrix A_i.
+@param[in]
+strideA     [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+x         Device pointer to the first vector x_1 on the GPU.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+stridex     [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stridex. However, ensure that stridex is of appropriate size.
+This typically means stridex >= n * incx. stridex should be non zero.
+@param[in]
+beta      device pointer or host pointer to scalar beta.
+@param[out]
+y         Device pointer to the first vector y_1 on the GPU.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+stridey     [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+There are no restrictions placed on stridey. However, ensure that stridey is of appropriate size.
+This typically means stridey >= n * incy. stridey should be non zero.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f32,
+        y: *mut f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsbmv_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        strideA: rocblas_stride,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        beta: *const f64,
+        y: *mut f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+ger,geru,gerc performs the matrix-vector operations:
+
+A := A + alpha*x*y**T , OR
+A := A + alpha*x*y**H for gerc
+where alpha is a scalar, x and y are vectors, and A is an
+m by n matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+m         [rocblas_int]
+the number of rows of the matrix A.
+@param[in]
+n         [rocblas_int]
+the number of columns of the matrix A.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+*/
+    pub fn rocblas_sger(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sger_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        y: *const f32,
+        incy: i64,
+        A: *mut f32,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        y: *const f64,
+        incy: i64,
+        A: *mut f64,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+ger_batched,geru_batched,gerc_batched perform a batch of the matrix-vector operations:
+
+A := A + alpha*x*y**T , OR
+A := A + alpha*x*y**H for gerc
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha is a scalar, x_i and y_i are vectors and A_i is an
+m by n matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+m         [rocblas_int]
+the number of rows of each matrix A_i.
+@param[in]
+n         [rocblas_int]
+the number of columns of each matrix A_i.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each vector x_i.
+@param[in]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in, out]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sger_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sger_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *const f32,
+        incy: i64,
+        A: *const *mut f32,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *const f64,
+        incy: i64,
+        A: *const *mut f64,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        A: *const *mut rocblas_float_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        A: *const *mut rocblas_double_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        A: *const *mut rocblas_float_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        A: *const *mut rocblas_double_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+ger_strided_batched,geru_strided_batched,gerc_strided_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*y_i**T, OR
+A_i := A_i + alpha*x_i*y_i**H  for gerc
+where (A_i, x_i, y_i) is the i-th instance of the batch.
+alpha is a scalar, x_i and y_i are vectors and A_i is an
+m by n matrix, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+m         [rocblas_int]
+the number of rows of each matrix A_i.
+@param[in]
+n         [rocblas_int]
+the number of columns of each matrix A_i.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer to the first vector (x_1) in the batch.
+@param[in]
+incx      [rocblas_int]
+specifies the increments for the elements of each vector x_i.
+@param[in]
+stridex   [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical
+case this means stride_x >= m * incx.
+@param[in, out]
+y         device pointer to the first vector (y_1) in the batch.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each vector y_i.
+@param[in]
+stridey   [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+There are no restrictions placed on stride_y. However, ensure that stride_y is of appropriate size. For a typical
+case this means stride_y >= n * incy.
+@param[in, out]
+A         device pointer to the first matrix (A_1) in the batch.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+strideA     [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1)
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sger_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc_strided_batched(
+        handle: rocblas_handle,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sger_strided_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut f32,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dger_strided_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut f64,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeru_strided_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeru_strided_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgerc_strided_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgerc_strided_batched_64(
+        handle: rocblas_handle,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spr performs the matrix-vector operations:
+
+A := A + alpha*x*x**T
+where alpha is a scalar, x is a vector, and A is an
+n by n symmetric matrix, supplied in packed form.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of A is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of A is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in, out]
+AP        device pointer storing the packed version of the specified triangular portion of
+the symmetric matrix A. Of at least size ((n * (n + 1)) / 2).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of the symmetric matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 4)
+1 2 4 7
+2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+4 5 6 9
+7 8 9 0
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of the symmetric matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 4)
+1 2 3 4
+2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+3 6 8 9
+4 7 9 0*/
+    pub fn rocblas_sspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        AP: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        AP: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        AP: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        AP: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        AP: *mut rocblas_float_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        AP: *mut rocblas_double_complex,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spr_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*x_i**T
+where alpha is a scalar, x_i is a vector, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in, out]
+AP        device array of device pointers storing the packed version of the specified triangular portion of
+each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 4)
+1 2 4 7
+2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+4 5 6 9
+7 8 9 0
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 4)
+1 2 3 4
+2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+3 6 8 9
+4 7 9 0
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_sspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        AP: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        AP: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: i64,
+        AP: *const *mut f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: i64,
+        AP: *const *mut f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        AP: *const *mut rocblas_float_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        AP: *const *mut rocblas_double_complex,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spr_strided_batched performs the matrix-vector operations:
+
+A_i := A_i + alpha*x_i*x_i**T
+where alpha is a scalar, x_i is a vector, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer pointing to the first vector (x_1).
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in, out]
+AP        device pointer storing the packed version of the specified triangular portion of
+each symmetric matrix A_i. Points to the first A_1.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 4)
+1 2 4 7
+2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+4 5 6 9
+7 8 9 0
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(2) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 4)
+1 2 3 4
+2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+3 6 8 9
+4 7 9 0
+@param[in]
+stride_A    [rocblas_stride]
+stride from the start of one (A_i) and the next (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_sspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut f32,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut f64,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        AP: *mut f32,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        AP: *mut f64,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cspr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_float_complex,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zspr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        AP: *mut rocblas_double_complex,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spr2 performs the matrix-vector operation:
+
+A := A + alpha*x*y**T + alpha*y*x**T
+where alpha is a scalar, x and y are vectors, and A is an
+n by n symmetric matrix, supplied in packed form.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of A is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of A is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+AP        device pointer storing the packed version of the specified triangular portion of
+the symmetric matrix A. Of at least size ((n * (n + 1)) / 2).
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of the symmetric matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 4)
+1 2 4 7
+2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+4 5 6 9
+7 8 9 0
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of the symmetric matrix A is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(n) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 4)
+1 2 3 4
+2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+3 6 8 9
+4 7 9 0*/
+    pub fn rocblas_sspr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        AP: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        AP: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        y: *const f32,
+        incy: i64,
+        AP: *mut f32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        y: *const f64,
+        incy: i64,
+        AP: *mut f64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spr2_batched performs the matrix-vector operation:
+
+A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T
+where alpha is a scalar, x_i and y_i are vectors, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in, out]
+AP        device array of device pointers storing the packed version of the specified triangular portion of
+each symmetric matrix A_i of at least size ((n * (n + 1)) / 2). Array is of at least size batch_count.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 4)
+1 2 4 7
+2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+4 5 6 9
+7 8 9 0
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(n) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 4)
+1 2 3 4
+2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+3 6 8 9
+4 7 9 0
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_sspr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        AP: *const *mut f32,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        AP: *const *mut f64,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *const f32,
+        incy: i64,
+        AP: *const *mut f32,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *const f64,
+        incy: i64,
+        AP: *const *mut f64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+spr2_strided_batched performs the matrix-vector operation:
+
+A_i := A_i + alpha*x_i*y_i**T + alpha*y_i*x_i**T
+where alpha is a scalar, x_i and y_i are vectors, and A_i is an
+n by n symmetric matrix, supplied in packed form, for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- rocblas_fill_upper: The upper triangular part of each A_i is supplied in AP.
+- rocblas_fill_lower: The lower triangular part of each A_i is supplied in AP.
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A_i. Must be at least 0.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer pointing to the first vector (x_1).
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+@param[in]
+y         device pointer pointing to the first vector (y_1).
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+stride_y  [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1).
+@param[in, out]
+AP        device pointer storing the packed version of the specified triangular portion of
+each symmetric matrix A_i. Points to the first A_1.
+
+if uplo == rocblas_fill_upper:
+The upper triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(0,1)
+AP(2) = A(1,1), etc.
+Ex: (rocblas_fill_upper; n = 4)
+1 2 4 7
+2 3 5 8   -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+4 5 6 9
+7 8 9 0
+
+if uplo == rocblas_fill_lower:
+The lower triangular portion of each symmetric matrix A_i is supplied.
+The matrix is compacted so that AP contains the triangular portion
+column-by-column
+so that:
+AP(0) = A(0,0)
+AP(1) = A(1,0)
+AP(n) = A(2,1), etc.
+Ex: (rocblas_fill_lower; n = 4)
+1 2 3 4
+2 5 6 7    -----> [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
+3 6 8 9
+4 7 9 0
+@param[in]
+stride_A    [rocblas_stride]
+stride from the start of one (A_i) and the next (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.*/
+    pub fn rocblas_sspr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut f32,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        AP: *mut f64,
+        stride_A: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sspr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const f32,
+        incy: i64,
+        stride_y: rocblas_stride,
+        AP: *mut f32,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dspr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const f64,
+        incy: i64,
+        stride_y: rocblas_stride,
+        AP: *mut f64,
+        stride_A: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+syr performs the matrix-vector operations:
+
+A := A + alpha*x*x**T
+where alpha is a scalar, x is a vector, and A is an
+n by n symmetric matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in, out]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+*/
+    pub fn rocblas_ssyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        A: *mut f32,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        A: *mut f64,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+syr_batched performs a batch of matrix-vector operations:
+
+A[i] := A[i] + alpha*x[i]*x[i]**T
+where alpha is a scalar, x is an array of vectors, and A is an array of
+n by n symmetric matrices, for i = 1 , ... , batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in, out]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: i64,
+        A: *const *mut f32,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: i64,
+        A: *const *mut f64,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        A: *const *mut rocblas_float_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        A: *const *mut rocblas_double_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+syr_strided_batched performs the matrix-vector operations:
+
+A[i] := A[i] + alpha*x[i]*x[i]**T
+where alpha is a scalar, vectors, and A is an array of
+n by n symmetric matrices, for i = 1 , ... , batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stridex   [rocblas_stride]
+specifies the pointer increment between vectors (x_i) and (x_i+1).
+@param[in, out]
+A         device pointer to the first matrix A_1.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+strideA   [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        A: *mut f32,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        A: *mut f64,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+syr2 performs the matrix-vector operations:
+
+A := A + alpha*x*y**T + alpha*y*x**T
+where alpha is a scalar, x and y are vectors, and A is an
+n by n symmetric matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+y         device pointer storing vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+*/
+    pub fn rocblas_ssyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        y: *const f32,
+        incy: rocblas_int,
+        A: *mut f32,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        y: *const f64,
+        incy: rocblas_int,
+        A: *mut f64,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        y: *const f32,
+        incy: i64,
+        A: *mut f32,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        y: *const f64,
+        incy: i64,
+        A: *mut f64,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+syr2_batched performs a batch of matrix-vector operations:
+
+A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
+where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a
+n by n symmetric matrix, for i = 1 , ... , batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of matrix A.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in, out]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: rocblas_int,
+        y: *const *const f32,
+        incy: rocblas_int,
+        A: *const *mut f32,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: rocblas_int,
+        y: *const *const f64,
+        incy: rocblas_int,
+        A: *const *mut f64,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_float_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_float_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        y: *const *const rocblas_double_complex,
+        incy: rocblas_int,
+        A: *const *mut rocblas_double_complex,
+        lda: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const *const f32,
+        incx: i64,
+        y: *const *const f32,
+        incy: i64,
+        A: *const *mut f32,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const *const f64,
+        incx: i64,
+        y: *const *const f64,
+        incy: i64,
+        A: *const *mut f64,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        y: *const *const rocblas_float_complex,
+        incy: i64,
+        A: *const *mut rocblas_float_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        y: *const *const rocblas_double_complex,
+        incy: i64,
+        A: *const *mut rocblas_double_complex,
+        lda: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 2 API </b>
+
+\details
+syr2_strided_batched the matrix-vector operations:
+
+A[i] := A[i] + alpha*x[i]*y[i]**T + alpha*y[i]*x[i]**T
+where alpha is a scalar, x[i] and y[i] are vectors, and A[i] is a
+n by n symmetric matrices, for i = 1 , ... , batch_count
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+- if rocblas_fill_upper, the lower part of A is not referenced
+- if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+n         [rocblas_int]
+the number of rows and columns of each matrix A.
+@param[in]
+alpha
+device pointer or host pointer to scalar alpha.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stridex   [rocblas_stride]
+specifies the pointer increment between vectors (x_i) and (x_i+1).
+@param[in]
+y         device pointer to the first vector y_1.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+stridey   [rocblas_stride]
+specifies the pointer increment between vectors (y_i) and (y_i+1).
+@param[in, out]
+A         device pointer to the first matrix A_1.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+strideA   [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f32,
+        x: *const f32,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f32,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const f64,
+        x: *const f64,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut f64,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: rocblas_int,
+        strideA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f32,
+        x: *const f32,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f32,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut f32,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const f64,
+        x: *const f64,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const f64,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut f64,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_float_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut rocblas_float_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *const rocblas_double_complex,
+        incy: i64,
+        stridey: rocblas_stride,
+        A: *mut rocblas_double_complex,
+        lda: i64,
+        strideA: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+hemm performs one of the matrix-matrix operations:
+
+C := alpha*A*B + beta*C if side == rocblas_side_left,
+C := alpha*B*A + beta*C if side == rocblas_side_right,
+
+where alpha and beta are scalars, B and C are m by n matrices, and
+A is a Hermitian matrix stored as either upper or lower.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side  [rocblas_side]
+- rocblas_side_left:      C := alpha*A*B + beta*C
+- rocblas_side_right:     C := alpha*B*A + beta*C
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix
+- rocblas_fill_lower:  A is a  lower triangular matrix
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B and C. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B and C. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A and B are not referenced.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+- A is m by m if side == rocblas_side_left
+- A is n by n if side == rocblas_side_right
+Only the upper/lower triangular part is accessed.
+The imaginary component of the diagonal elements is not used.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+otherwise lda >= max( 1, n ).
+
+@param[in]
+B       pointer storing matrix B on the GPU.
+Matrix dimension is m by n
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B. ldb >= max( 1, m ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+Matrix dimension is m by n
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, m ).
+*/
+    pub fn rocblas_chemm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chemm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+hemm_batched performs a batch of the matrix-matrix operations:
+
+C_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,
+C_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,
+
+where alpha and beta are scalars, B_i and C_i are m by n matrices, and
+A_i is a Hermitian matrix stored as either upper or lower.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side  [rocblas_side]
+- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i
+- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix
+- rocblas_fill_lower:  A_i is a  lower triangular matrix
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B_i and C_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B_i and C_i. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A_i and B_i are not referenced.
+
+@param[in]
+A       device array of device pointers storing each matrix A_i on the GPU.
+- A_i is m by m if side == rocblas_side_left
+- A_i is n by n if side == rocblas_side_right
+Only the upper/lower triangular part is accessed.
+The imaginary component of the diagonal elements is not used.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+otherwise lda >= max( 1, n ).
+
+@param[in]
+B       device array of device pointers storing each matrix B_i on the GPU.
+Matrix dimension is m by n
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i. ldb >= max( 1, m ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C_i need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+Matrix dimension is m by n
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C_i. ldc >= max( 1, m ).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chemm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chemm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+hemm_strided_batched performs a batch of the matrix-matrix operations:
+
+C_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,
+C_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,
+
+where alpha and beta are scalars, B_i and C_i are m by n matrices, and
+A_i is a Hermitian matrix stored as either upper or lower.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side  [rocblas_side]
+- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i
+- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix
+- rocblas_fill_lower:  A_i is a  lower triangular matrix
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B_i and C_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B_i and C_i. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A_i and B_i are not referenced.
+
+@param[in]
+A       device pointer to first matrix A_1
+- A_i is m by m if side == rocblas_side_left
+- A_i is n by n if side == rocblas_side_right
+Only the upper/lower triangular part is accessed.
+The imaginary component of the diagonal elements is not used.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+otherwise lda >= max( 1, n ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+B       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i.
+
+if side = rocblas_operation_none,  ldb >= max( 1, m ),
+otherwise ldb >= max( 1, n ).
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, m ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_chemm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_chemm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zhemm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+herk performs one of the matrix-matrix operations for a Hermitian rank-k update:
+
+C := alpha*op( A )*op( A )^H + beta*C,
+
+where  alpha and beta are scalars, op(A) is an n by k matrix, and
+C is a n x n Hermitian matrix stored as either upper or lower.
+
+op( A ) = A, and A is n by k if transA == rocblas_operation_none
+op( A ) = A^H and A is k by n if transA == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+- rocblas_operation_none:                 op(A) = A
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+Matrix dimension is ( lda, k ) when if transA = rocblas_operation_none, otherwise (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if transA = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+*/
+    pub fn rocblas_cherk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cherk_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+herk_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:
+
+C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i,
+
+where  alpha and beta are scalars, op(A) is an n by k matrix, and
+C_i is a n x n Hermitian matrix stored as either upper or lower.
+
+op( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none
+op( A_i ) = A_i^H and A_i is k by n if transA == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_conjugate_transpose: op(A) = A^H
+- rocblas_operation_none:                op(A) = A
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       device array of device pointers storing each matrix_i A of dimension (lda, k)
+when transA is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if transA = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_cherk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cherk_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+herk_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:
+
+C_i := alpha*op( A_i )*op( A_i )^H + beta*C_i,
+
+where  alpha and beta are scalars, op(A) is an n by k matrix, and
+C_i is a n x n Hermitian matrix stored as either upper or lower.
+
+op( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none
+op( A_i ) = A_i^H and A_i is k by n if transA == rocblas_operation_conjugate_transpose
+
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_conjugate_transpose: op(A) = A^H
+- rocblas_operation_none:                op(A) = A
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
+when transA is rocblas_operation_none, otherwise of dimension (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if transA = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       Device pointer to the first matrix C_1 on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_cherk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cherk_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherk_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+her2k performs one of the matrix-matrix operations for a Hermitian rank-2k update:
+
+C := alpha*op( A )*op( B )^H + conj(alpha)*op( B )*op( A )^H + beta*C,
+
+where  alpha and beta are scalars, op(A) and op(B) are n by k matrices, and
+C is a n x n Hermitian matrix stored as either upper or lower.
+
+op( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none
+op( A ) = A^H, op( B ) = B^H,  and A and B are k by n if trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_conjugate_transpose:  op( A ) = A^H, op( B ) = B^H
+- rocblas_operation_none:                 op( A ) = A, op( B ) = B
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+Matrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+B       pointer storing matrix B on the GPU.
+Matrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+*/
+    pub fn rocblas_cher2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher2k_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+her2k_batched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update:
+
+C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and
+C_i is a n x n Hermitian matrix stored as either upper or lower.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_conjugate_transpose: op(A) = A^H
+- rocblas_operation_none:                op(A) = A
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       device array of device pointers storing each matrix_i A of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+@param[in]
+B       device array of device pointers storing each matrix_i B of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n).
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_cher2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher2k_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+her2k_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-2k update:
+
+C_i := alpha*op( A_i )*op( B_i )^H + conj(alpha)*op( B_i )*op( A_i )^H + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and
+C_i is a n x n Hermitian matrix stored as either upper or lower.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H
+- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+B       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n).
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       Device pointer to the first matrix C_1 on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_cher2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cher2k_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zher2k_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+herkx performs one of the matrix-matrix operations for a Hermitian rank-k update:
+
+C := alpha*op( A )*op( B )^H + beta*C,
+
+where  alpha and beta are scalars, op(A) and op(B) are n by k matrices, and
+C is a n x n Hermitian matrix stored as either upper or lower.
+
+This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
+
+op( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none
+op( A ) = A^H, op( B ) = B^H,  and A and B are k by n if trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_conjugate_transpose:  op( A ) = A^H, op( B ) = B^H
+- rocblas_operation_none:                 op( A ) = A, op( B ) = B
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+Matrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+@param[in]
+B       pointer storing matrix B on the GPU.
+Matrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+*/
+    pub fn rocblas_cherkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cherkx_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+herkx_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:
+
+C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and
+C_i is a n x n Hermitian matrix stored as either upper or lower.
+
+This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_conjugate_transpose: op(A) = A^H
+- rocblas_operation_none:                op(A) = A
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       device array of device pointers storing each matrix_i A of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+B       device array of device pointers storing each matrix_i B of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n)
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_cherkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cherkx_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const f32,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const f64,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+herkx_strided_batched performs a batch of the matrix-matrix operations for a Hermitian rank-k update:
+
+C_i := alpha*op( A_i )*op( B_i )^H + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrices, and
+C_i is a n x n Hermitian matrix stored as either upper or lower.
+
+This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be Hermitian.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^H, op( B_i ) = B_i^H,  and A_i and B_i are k by n if trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^H, op( B_i ) = B_i^H
+- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1)
+
+@param[in]
+B       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n).
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1)
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       Device pointer to the first matrix C_1 on the GPU.
+The imaginary component of the diagonal elements are not used but are set to zero unless quick return.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_cherkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cherkx_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zherkx_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+symm performs one of the matrix-matrix operations:
+
+C := alpha*A*B + beta*C if side == rocblas_side_left,
+C := alpha*B*A + beta*C if side == rocblas_side_right,
+
+where alpha and beta are scalars, B and C are m by n matrices, and
+A is a symmetric matrix stored as either upper or lower.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side  [rocblas_side]
+- rocblas_side_left:      C := alpha*A*B + beta*C
+- rocblas_side_right:     C := alpha*B*A + beta*C
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix
+- rocblas_fill_lower:  A is a  lower triangular matrix
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B and C. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B and C. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A and B are not referenced.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+- A is m by m if side == rocblas_side_left
+- A is n by n if side == rocblas_side_right
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+otherwise lda >= max( 1, n ).
+
+@param[in]
+B       pointer storing matrix B on the GPU.
+Matrix dimension is m by n
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B. ldb >= max( 1, m ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+Matrix dimension is m by n
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, m ).
+*/
+    pub fn rocblas_ssymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssymm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        B: *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        B: *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+symm_batched performs a batch of the matrix-matrix operations:
+
+C_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,
+C_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,
+
+where alpha and beta are scalars, B_i and C_i are m by n matrices, and
+A_i is a symmetric matrix stored as either upper or lower.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side  [rocblas_side]
+- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i
+- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix
+- rocblas_fill_lower:  A_i is a  lower triangular matrix
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B_i and C_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B_i and C_i. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A_i and B_i are not referenced.
+
+@param[in]
+A       device array of device pointers storing each matrix A_i on the GPU.
+- A_i is m by m if side == rocblas_side_left
+- A_i is n by n if side == rocblas_side_right
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+otherwise lda >= max( 1, n ).
+
+@param[in]
+B       device array of device pointers storing each matrix B_i on the GPU.
+Matrix dimension is m by n
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i. ldb >= max( 1, m ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C_i need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+Matrix dimension is m by n.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C_i. ldc >= max( 1, m ).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssymm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        B: *const *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        B: *const *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+symm_strided_batched performs a batch of the matrix-matrix operations:
+
+C_i := alpha*A_i*B_i + beta*C_i if side == rocblas_side_left,
+C_i := alpha*B_i*A_i + beta*C_i if side == rocblas_side_right,
+
+where alpha and beta are scalars, B_i and C_i are m by n matrices, and
+A_i is a symmetric matrix stored as either upper or lower.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side  [rocblas_side]
+- rocblas_side_left:      C_i := alpha*A_i*B_i + beta*C_i
+- rocblas_side_right:     C_i := alpha*B_i*A_i + beta*C_i
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A_i is an upper triangular matrix
+- rocblas_fill_lower:  A_i is a  lower triangular matrix
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B_i and C_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B_i and C_i. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A_i and B_i are not referenced.
+
+@param[in]
+A       device pointer to first matrix A_1
+- A_i is m by m if side == rocblas_side_left
+- A_i is n by n if side == rocblas_side_right
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+otherwise lda >= max( 1, n ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+B       device pointer to first matrix B_1 of dimension (ldb, n) on the GPU.
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i. ldb >= max( 1, m ).
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1).
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C        device pointer to first matrix C_1 of dimension (ldc, n) on the GPU.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, m ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssymm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsymm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csymm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsymm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syrk performs one of the matrix-matrix operations for a symmetric rank-k update:
+
+C := alpha*op( A )*op( A )^T + beta*C,
+
+where  alpha and beta are scalars, op(A) is an n by k matrix, and
+C is a symmetric n x n matrix stored as either upper or lower.
+
+op( A ) = A, and A is n by k if transA == rocblas_operation_none
+op( A ) = A^T and A is k by n if transA == rocblas_operation_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_transpose:           op(A) = A^T
+- rocblas_operation_none:                op(A) = A
+- rocblas_operation_conjugate_transpose: op(A) = A^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types. See cherk
+and zherk.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+Matrix dimension is ( lda, k ) when if transA = rocblas_operation_none, otherwise (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if transA = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+only the upper/lower triangular part is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+*/
+    pub fn rocblas_ssyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyrk_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syrk_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:
+
+C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) is an n by k matrix, and
+C_i is a symmetric n x n matrix stored as either upper or lower.
+
+op( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none
+op( A_i ) = A_i^T and A_i is k by n if transA == rocblas_operation_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_transpose:           op(A) = A^T
+- rocblas_operation_none:                op(A) = A
+- rocblas_operation_conjugate_transpose: op(A) = A^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types. See cherk
+and zherk.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       device array of device pointers storing each matrix_i A of dimension (lda, k)
+when transA is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if transA = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyrk_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syrk_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:
+
+C_i := alpha*op( A_i )*op( A_i )^T + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) is an n by k matrix, and
+C_i is a symmetric n x n matrix stored as either upper or lower.
+
+op( A_i ) = A_i, and A_i is n by k if transA == rocblas_operation_none
+op( A_i ) = A_i^T and A_i is k by n if transA == rocblas_operation_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+transA  [rocblas_operation]
+- rocblas_operation_transpose:           op(A) = A^T
+- rocblas_operation_none:                op(A) = A
+- rocblas_operation_conjugate_transpose: op(A) = A^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types. See cherk
+and zherk.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
+when transA is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if transA = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       Device pointer to the first matrix C_1 on the GPU. on the GPU.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1)
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyrk_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrk_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrk_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrk_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syr2k performs one of the matrix-matrix operations for a symmetric rank-2k update:
+
+C := alpha*(op( A )*op( B )^T + op( B )*op( A )^T) + beta*C,
+
+where  alpha and beta are scalars, op(A) and op(B) are n by k matrix, and
+C is a symmetric n x n matrix stored as either upper or lower.
+
+op( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none
+op( A ) = A^T, op( B ) = B^T, and A and B are k by n if trans == rocblas_operation_transpose
+or for ssyr2k and dsyr2k when trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_transpose:           op( A ) = A^T, op( B ) = B^T
+- rocblas_operation_none:                op( A ) = A, op( B ) = B
+- rocblas_operation_conjugate_transpose: op( A ) = A^T, op( B ) = B^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types in csyr2k and zsyr2k.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A) and op(B). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+Matrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+B       pointer storing matrix B on the GPU.
+Matrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)
+only the upper/lower triangular part is accessed.
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B.
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+*/
+    pub fn rocblas_ssyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr2k_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        B: *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        B: *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syr2k_batched performs a batch of the matrix-matrix operations for a symmetric rank-2k update:
+
+C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and
+C_i is a symmetric n x n matrix stored as either upper or lower.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == rocblas_operation_transpose
+or for ssyr2k_batched and dsyr2k_batched when trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T
+- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i
+- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types in csyr2k_batched and zsyr2k_batched.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       device array of device pointers storing each matrix_i A of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+@param[in]
+B       device array of device pointers storing each matrix_i B of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n).
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr2k_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        B: *const *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        B: *const *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syr2k_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-2k update:
+
+C_i := alpha*(op( A_i )*op( B_i )^T + op( B_i )*op( A_i )^T) + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and
+C_i is a symmetric n x n matrix stored as either upper or lower.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^T, op( B_i ) = B_i^T, and A_i and B_i are k by n if trans == rocblas_operation_transpose
+or for ssyr2k_strided_batched and dsyr2k_strided_batched when trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T
+- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i
+- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types in csyr2k_strided_batched and zsyr2k_strided_batched.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n).
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1)
+
+@param[in]
+B       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n)
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1)
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       Device pointer to the first matrix C_1 on the GPU.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyr2k_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyr2k_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyr2k_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyr2k_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syrkx performs one of the matrix-matrix operations for a symmetric rank-k update:
+
+C := alpha*op( A )*op( B )^T + beta*C,
+
+where  alpha and beta are scalars, op(A) and op(B) are n by k matrix, and
+C is a symmetric n x n matrix stored as either upper or lower.
+
+This routine should only be used when the caller can guarantee that the result of op( A )*op( B )^T will be symmetric.
+
+op( A ) = A, op( B ) = B, and A and B are n by k if trans == rocblas_operation_none
+op( A ) = A^T, op( B ) = B^T,  and A and B are k by n if trans == rocblas_operation_transpose
+or for ssyrkx and dsyrkx when trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_transpose:           op( A ) = A^T, op( B ) = B^T
+- rocblas_operation_none:                op( A ) = A, op( B ) = B
+- rocblas_operation_conjugate_transpose: op( A ) = A^T, op( B ) = B^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types in csyrkx and zsyrkx.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A) and op(B). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       pointer storing matrix A on the GPU.
+Matrix dimension is ( lda, k ) when if trans = rocblas_operation_none, otherwise (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+B       pointer storing matrix B on the GPU.
+Matrix dimension is ( ldb, k ) when if trans = rocblas_operation_none, otherwise (ldb, n)
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       pointer storing matrix C on the GPU.
+only the upper/lower triangular part is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+*/
+    pub fn rocblas_ssyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyrkx_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        B: *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        B: *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syrkx_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:
+
+C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and
+C_i is a symmetric n x n matrix stored as either upper or lower.
+
+This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^T, op( B_i ) = B_i^T,  and A_i and B_i are k by n if trans == rocblas_operation_transpose
+or for ssyrkx_batched and dsyrkx_batched when trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T
+- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i
+- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types in csyrkx_batched and zsyrkx_batched.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       device array of device pointers storing each matrix_i A of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+B       device array of device pointers storing each matrix_i B of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n)
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       device array of device pointers storing each matrix C_i on the GPU.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyrkx_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        B: *const *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        B: *const *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+syrkx_strided_batched performs a batch of the matrix-matrix operations for a symmetric rank-k update:
+
+C_i := alpha*op( A_i )*op( B_i )^T + beta*C_i,
+
+where  alpha and beta are scalars, op(A_i) and op(B_i) are n by k matrix, and
+C_i is a symmetric n x n matrix stored as either upper or lower.
+
+This routine should only be used when the caller can guarantee that the result of op( A_i )*op( B_i )^T will be symmetric.
+
+op( A_i ) = A_i, op( B_i ) = B_i, and A_i and B_i are n by k if trans == rocblas_operation_none
+op( A_i ) = A_i^T, op( B_i ) = B_i^T,  and A_i and B_i are k by n if trans == rocblas_operation_transpose
+or for ssyrkx_strided_batched and dsyrkx_strided_batched when trans == rocblas_operation_conjugate_transpose
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C_i is an upper triangular matrix
+- rocblas_fill_lower:  C_i is a  lower triangular matrix
+
+@param[in]
+trans  [rocblas_operation]
+- rocblas_operation_transpose:           op( A_i ) = A_i^T, op( B_i ) = B_i^T
+- rocblas_operation_none:                op( A_i ) = A_i, op( B_i ) = B_i
+- rocblas_operation_conjugate_transpose: op( A_i ) = A_i^T, op( B_i ) = B_i^T
+
+rocblas_operation_conjugate_transpose is not supported for complex types in csyrkx_strided_batched and zsyrkx_strided_batched.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of rows and columns of C_i. n >= 0.
+
+@param[in]
+k       [rocblas_int]
+k specifies the number of columns of op(A). k >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and A need not be set before
+entry.
+
+@param[in]
+A       Device pointer to the first matrix A_1 on the GPU of dimension (lda, k)
+when trans is rocblas_operation_none, otherwise of dimension (lda, n)
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A_i.
+
+if trans = rocblas_operation_none,  lda >= max( 1, n ),
+otherwise lda >= max( 1, k ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+B       Device pointer to the first matrix B_1 on the GPU of dimension (ldb, k)
+when trans is rocblas_operation_none, otherwise of dimension (ldb, n).
+
+@param[in]
+ldb     [rocblas_int]
+ldb specifies the first dimension of B_i.
+
+if trans = rocblas_operation_none,  ldb >= max( 1, n ),
+otherwise ldb >= max( 1, k ).
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1).
+
+@param[in]
+beta
+beta specifies the scalar beta. When beta is
+zero then C need not be set before entry.
+
+@param[in]
+C       Device pointer to the first matrix C_1 on the GPU.
+only the upper/lower triangular part of each C_i is accessed.
+
+@param[in]
+ldc    [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, n ).
+
+@param[in, out]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_ssyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ssyrkx_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dsyrkx_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_csyrkx_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zsyrkx_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        trans: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trmm performs one of the matrix-matrix operations:
+
+C := alpha*op( A )*B,   or
+C := alpha*B*op( A ),
+
+The Legacy BLAS in-place trmm functionality,
+
+B := alpha*op( A )*B,   or
+B := alpha*B*op( A ),
+
+is available by setting pointer C equal to pointer B, and ldc equal to ldb.
+
+alpha  is a scalar,  B  is an m by n matrix, C  is an m by n matrix,  A  is a unit, or
+non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+op( A ) = A     or
+op( A ) = A^T   or
+op( A ) = A^H.
+
+When uplo == rocblas_fill_upper the  leading  k by k
+upper triangular part of the array  A must contain the upper
+triangular matrix and the strictly lower triangular part of
+A is not referenced. Here k is m when side == rocblas_side_left
+and is n when side == rocblas_side_right.
+
+When uplo == rocblas_fill_lower the  leading  k by k
+lower triangular part of the array  A must contain the lower
+triangular matrix  and the strictly upper triangular part of
+A is not referenced. Here k is m when  side == rocblas_side_left
+and is n when side == rocblas_side_right.
+
+Note that when  diag == rocblas_diagonal_unit  the diagonal elements of
+A  are not referenced either,  but are assumed to be  unity.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side    [rocblas_side]
+Specifies whether op(A) multiplies B from the left or right as follows:
+- rocblas_side_left:       C := alpha*op( A )*B
+- rocblas_side_right:      C := alpha*B*op( A )
+
+@param[in]
+uplo    [rocblas_fill]
+Specifies whether the matrix A is an upper or lower triangular matrix as follows:
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+Specifies the form of op(A) to be used in the matrix multiplication as follows:
+- rocblas_operation_none:    op(A) = A
+- rocblas_operation_transpose:      op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+Specifies whether or not A is unit triangular as follows:
+- rocblas_diagonal_unit:      A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A is not referenced and B need not be set before
+entry.
+
+@param[in]
+A       Device pointer to matrix A on the GPU.
+A has dimension ( lda, k ), where k is m
+when  side == rocblas_side_left  and
+is  n  when  side == rocblas_side_right.
+
+When uplo == rocblas_fill_upper the  leading  k by k
+upper triangular part of the array  A must contain the upper
+triangular matrix  and the strictly lower triangular part of
+A is not referenced.
+
+When uplo == rocblas_fill_lower the  leading  k by k
+lower triangular part of the array  A must contain the lower
+triangular matrix  and the strictly upper triangular part of
+A is not referenced.
+
+Note that when  diag == rocblas_diagonal_unit  the diagonal elements of
+A  are not referenced either,  but are assumed to be  unity.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side == rocblas_side_left,  lda >= max( 1, m ),
+if side == rocblas_side_right, lda >= max( 1, n ).
+
+@param[in]
+B       Device pointer to the matrix B on the GPU.
+
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of B. ldb >= max( 1, m ).
+
+@param[out]
+C      Device pointer to the matrix C on the GPU.
+
+@param[in]
+ldc   [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, m).
+If B and C are pointers to the same matrix then ldc must equal ldb or
+rocblas_status_invalid_value will be returned.
+*/
+    pub fn rocblas_strmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        B: *const f32,
+        ldb: i64,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        B: *const f64,
+        ldb: i64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trmm_batched performs one of the matrix-matrix operations:
+
+C_i := alpha*op( A_i )*B_i,   or
+C_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,
+
+The Legacy BLAS in-place trmm_batched functionality,
+
+B_i := alpha*op( A_i )*B_i,   or
+B_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,
+
+is available by setting pointer C equal to pointer B and ldc equal to ldb.
+
+alpha  is a scalar,  B_i  is an m by n matrix, C_i  is an m by n matrix,  A_i  is a unit, or
+non-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of
+
+op( A_i ) = A_i     or
+op( A_i ) = A_i^T   or
+op( A_i ) = A_i^H.
+
+When uplo == rocblas_fill_upper the  leading  k by k
+upper triangular part of the array  A must contain the upper
+triangular matrix and the strictly lower triangular part of
+A is not referenced. Here k is m when side == rocblas_side_left
+and is n when side == rocblas_side_right.
+
+When uplo == rocblas_fill_lower the  leading  k by k
+lower triangular part of the array  A must contain the lower
+triangular matrix  and the strictly upper triangular part of
+A is not referenced. Here k is m when  side == rocblas_side_left
+and is n when side == rocblas_side_right.
+
+Note that when  diag == rocblas_diagonal_unit  the diagonal elements of
+A  are not referenced either,  but are assumed to be  unity.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side    [rocblas_side]
+Specifies whether op(A_i) multiplies B_i from the left or right as follows:
+- rocblas_side_left:       C_i := alpha*op( A_i )*B_i
+- rocblas_side_right:      C_i := alpha*B_i*op( A_i )
+
+@param[in]
+uplo    [rocblas_fill]
+Specifies whether the matrix A is an upper or lower triangular matrix as follows:
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+Specifies the form of op(A_i) to be used in the matrix multiplication as follows:
+- rocblas_operation_none:    op(A_i) = A_i
+- rocblas_operation_transpose:      op(A_i) = A_i^T
+- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H
+
+@param[in]
+diag    [rocblas_diagonal]
+Specifies whether or not A_i is unit triangular as follows:
+- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B_i. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A_i is not referenced and B_i need not be set before
+entry.
+
+@param[in]
+A       Device array of device pointers storing each matrix A_i on the GPU.
+Each A_i is of dimension ( lda, k ), where k is m
+when  side == rocblas_side_left  and
+is  n  when  side == rocblas_side_right.
+
+When uplo == rocblas_fill_upper the  leading  k by k
+upper triangular part of the array  A must contain the upper
+triangular matrix  and the strictly lower triangular part of
+A is not referenced.
+
+When uplo == rocblas_fill_lower the  leading  k by k
+lower triangular part of the array  A must contain the lower
+triangular matrix  and the strictly upper triangular part of
+A is not referenced.
+
+Note that when  diag == rocblas_diagonal_unit  the diagonal elements of
+A_i  are not referenced either,  but are assumed to be  unity.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side == rocblas_side_left,  lda >= max( 1, m ),
+if side == rocblas_side_right, lda >= max( 1, n ).
+
+@param[in]
+B       device array of device pointers storing each matrix B_i on the GPU.
+
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of B_i. ldb >= max( 1, m ).
+
+@param[out]
+C      device array of device pointers storing each matrix C_i on the GPU.
+
+@param[in]
+ldc   [rocblas_int]
+ldc specifies the first dimension of C. ldc >= max( 1, m).
+If B and C are pointers to the same array of pointers then ldc must
+equal ldb or rocblas_status_invalid_value will be returned.
+
+@param[in]
+batch_count [rocblas_int]
+number of instances i in the batch.*/
+    pub fn rocblas_strmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        B: *const *const f32,
+        ldb: i64,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        B: *const *const f64,
+        ldb: i64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trmm_strided_batched performs one of the matrix-matrix operations:
+
+C_i := alpha*op( A_i )*B_i,   or
+C_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,
+
+The Legacy BLAS in-place trmm_strided_batched functionality,
+
+B_i := alpha*op( A_i )*B_i,   or
+B_i := alpha*B_i*op( A_i )  for i = 0, 1, ... batch_count -1,
+
+is available by setting pointer C equal to pointer B, ldc equal to ldb, and stride_C equal to stride_B.
+
+alpha  is a scalar,  B_i  is an m by n matrix, C_i  is an m by n matrix,  A_i  is a unit, or
+non-unit,  upper or lower triangular matrix  and  op( A_i )  is one  of
+
+op( A_i ) = A_i   or
+op( A_i ) = A_i^T   or
+op( A_i ) = A_i^H.
+
+When uplo == rocblas_fill_upper the  leading  k by k
+upper triangular part of the array  A must contain the upper
+triangular matrix and the strictly lower triangular part of
+A is not referenced. Here k is m when side == rocblas_side_left
+and is n when side == rocblas_side_right.
+
+When uplo == rocblas_fill_lower the  leading  k by k
+lower triangular part of the array  A must contain the lower
+triangular matrix  and the strictly upper triangular part of
+A is not referenced. Here k is m when  side == rocblas_side_left
+and is n when side == rocblas_side_right.
+
+Note that when  diag == rocblas_diagonal_unit  the diagonal elements of
+A  are not referenced either,  but are assumed to be  unity.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side    [rocblas_side]
+Specifies whether op(A_i) multiplies B_i from the left or right as follows:
+- rocblas_side_left:       C_i := alpha*op( A_i )*B_i
+- rocblas_side_right:      C_i := alpha*B_i*op( A_i )
+
+@param[in]
+uplo    [rocblas_fill]
+Specifies whether the matrix A is an upper or lower triangular matrix as follows:
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+Specifies the form of op(A_i) to be used in the matrix multiplication as follows:
+- rocblas_operation_none:    op(A_i) = A_i
+- rocblas_operation_transpose:      op(A_i) = A_i^T
+- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H
+
+@param[in]
+diag    [rocblas_diagonal]
+Specifies whether or not A_i is unit triangular as follows:
+- rocblas_diagonal_unit:      A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A_i is not assumed to be unit triangular.
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B_i. n >= 0.
+
+@param[in]
+alpha
+alpha specifies the scalar alpha. When alpha is
+zero then A_i is not referenced and B_i need not be set before
+entry.
+
+@param[in]
+A       Device pointer to the first matrix A_0 on the GPU.
+Each A_i is of dimension ( lda, k ), where k is m
+when  side == rocblas_side_left  and
+is  n  when  side == rocblas_side_right.
+
+When uplo == rocblas_fill_upper the  leading  k by k
+upper triangular part of the array  A must contain the upper
+triangular matrix  and the strictly lower triangular part of
+A is not referenced.
+
+When uplo == rocblas_fill_lower the  leading  k by k
+lower triangular part of the array  A must contain the lower
+triangular matrix  and the strictly upper triangular part of
+A is not referenced.
+
+Note that when  diag == rocblas_diagonal_unit  the diagonal elements of
+A_i  are not referenced either,  but are assumed to be  unity.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side == rocblas_side_left,  lda >= max( 1, m ),
+if side == rocblas_side_right, lda >= max( 1, n ).
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+B       Device pointer to the first matrix B_0 on the GPU.
+
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of B_i. ldb >= max( 1, m ).
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1).
+
+@param[out]
+C      Device pointer to the first matrix C_0 on the GPU.
+
+@param[in]
+ldc   [rocblas_int]
+ldc specifies the first dimension of C_i. ldc >= max( 1, m).
+If B and C pointers are to the same matrix then ldc must equal ldb or
+rocblas_status_invalid_size will be returned.
+
+@param[in]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+If B == C and ldb == ldc then stride_C should equal stride_B or
+behavior is undefined.
+
+@param[in]
+batch_count [rocblas_int]
+number of instances i in the batch.*/
+    pub fn rocblas_strmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f32,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut f32,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const f64,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut f64,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trtri  compute the inverse of a matrix A, namely, invA
+and write the result into invA;
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+
+if rocblas_fill_upper, the lower part of A is not referenced
+if rocblas_fill_lower, the upper part of A is not referenced
+@param[in]
+diag      [rocblas_diagonal]
+- 'rocblas_diagonal_non_unit', A is non-unit triangular;
+- 'rocblas_diagonal_unit', A is unit triangular;
+@param[in]
+n         [rocblas_int]
+size of matrix A and invA.
+@param[in]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[out]
+invA      device pointer storing matrix invA.
+Partial inplace operation is supported. See below:
+-If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store
+the inverse of the upper triangular matrix, and the strictly lower
+triangular part of invA may be cleared.
+- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store
+the inverse of the lower triangular matrix, and the strictly upper
+triangular part of invA may be cleared.
+@param[in]
+ldinvA    [rocblas_int]
+specifies the leading dimension of invA.*/
+    pub fn rocblas_strtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        invA: *mut f32,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        invA: *mut f64,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        invA: *mut rocblas_float_complex,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrtri(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        invA: *mut rocblas_double_complex,
+        ldinvA: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trtri_batched  compute the inverse of A_i and write into invA_i where
+A_i and invA_i are the i-th matrices in the batch,
+for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+@param[in]
+diag      [rocblas_diagonal]
+- 'rocblas_diagonal_non_unit', A is non-unit triangular;
+- 'rocblas_diagonal_unit', A is unit triangular;
+@param[in]
+n         [rocblas_int]
+@param[in]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[out]
+invA      device array of device pointers storing the inverse of each matrix A_i.
+Partial inplace operation is supported. See below:
+-If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store
+the inverse of the upper triangular matrix, and the strictly lower
+triangular part of invA may be cleared.
+- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store
+the inverse of the lower triangular matrix, and the strictly upper
+triangular part of invA may be cleared.
+@param[in]
+ldinvA    [rocblas_int]
+specifies the leading dimension of each invA_i.
+@param[in]
+batch_count [rocblas_int]
+numbers of matrices in the batch.*/
+    pub fn rocblas_strtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        invA: *const *mut f32,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        invA: *const *mut f64,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        invA: *const *mut rocblas_float_complex,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrtri_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        invA: *const *mut rocblas_double_complex,
+        ldinvA: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trtri_strided_batched compute the inverse of A_i and write into invA_i where
+A_i and invA_i are the i-th matrices in the batch,
+for i = 1, ..., batch_count.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo      [rocblas_fill]
+specifies whether the upper 'rocblas_fill_upper' or lower 'rocblas_fill_lower'
+@param[in]
+diag      [rocblas_diagonal]
+- 'rocblas_diagonal_non_unit', A is non-unit triangular;
+- 'rocblas_diagonal_unit', A is unit triangular;
+@param[in]
+n         [rocblas_int]
+@param[in]
+A         device pointer pointing to address of first matrix A_1.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A.
+@param[in]
+stride_a  [rocblas_stride]
+"batch stride a": stride from the start of one A_i matrix to the next A_(i + 1).
+@param[out]
+invA      device pointer storing the inverses of each matrix A_i.
+Partial inplace operation is supported. See below:
+
+- If UPLO = 'U', the leading N-by-N upper triangular part of the invA will store
+the inverse of the upper triangular matrix, and the strictly lower
+triangular part of invA may be cleared.
+
+- If UPLO = 'L', the leading N-by-N lower triangular part of the invA will store
+the inverse of the lower triangular matrix, and the strictly upper
+triangular part of invA may be cleared.
+@param[in]
+ldinvA    [rocblas_int]
+specifies the leading dimension of each invA_i.
+@param[in]
+stride_invA  [rocblas_stride]
+"batch stride invA": stride from the start of one invA_i matrix to the next invA_(i + 1).
+@param[in]
+batch_count  [rocblas_int]
+numbers of matrices in the batch.*/
+    pub fn rocblas_strtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut f32,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut f64,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut rocblas_float_complex,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrtri_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        diag: rocblas_diagonal,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        invA: *mut rocblas_double_complex,
+        ldinvA: rocblas_int,
+        stride_invA: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trsm solves:
+
+op(A)*X = alpha*B or  X*op(A) = alpha*B,
+
+where alpha is a scalar, X and B are m by n matrices,
+
+A is triangular matrix and op(A) is one of
+
+op( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.
+
+The matrix X is overwritten on B.
+
+Note about memory allocation:
+When trsm is launched with a k evenly divisible by the internal block size of 128,
+and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated
+memory found in the handle to increase overall performance. This memory can be managed by using
+the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory
+used for temporary storage will default to 1 MB and may result in chunking, which in turn may
+reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set
+to the desired chunk of right hand sides to be used at a time
+(where k is m when rocblas_side_left and is n when rocblas_side_right).
+
+Although not widespread, some gemm kernels used by trsm may use atomic operations.
+See Atomic Operations in the API Reference Guide for more information.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side    [rocblas_side]
+- rocblas_side_left:       op(A)*X = alpha*B
+- rocblas_side_right:      X*op(A) = alpha*B
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a  lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+- transB:    op(A) = A.
+- rocblas_operation_transpose:      op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B. n >= 0.
+
+@param[in]
+alpha
+device pointer or host pointer specifying the scalar alpha. When alpha is
+&zero then A is not referenced and B need not be set before
+entry.
+
+@param[in]
+A       device pointer storing matrix A.
+of dimension ( lda, k ), where k is m
+when  rocblas_side_left  and
+is  n  when  rocblas_side_right
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+if side = rocblas_side_right, lda >= max( 1, n ).
+
+@param[in,out]
+B       device pointer storing matrix B.
+
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of B. ldb >= max( 1, m ).
+*/
+    pub fn rocblas_strsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *mut f32,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *mut f64,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strsm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        B: *mut f32,
+        ldb: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        B: *mut f64,
+        ldb: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *mut rocblas_float_complex,
+        ldb: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *mut rocblas_double_complex,
+        ldb: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trsm_batched performs the following batched operation:
+
+op(A_i)*X_i = alpha*B_i or
+X_i*op(A_i) = alpha*B_i, for i = 1, ..., batch_count,
+
+where alpha is a scalar, X and B are batched m by n matrices,
+
+A is triangular batched matrix and op(A) is one of
+
+op( A ) = A   or
+op( A ) = A^T   or
+op( A ) = A^H.
+
+Each matrix X_i is overwritten on B_i for i = 1, ..., batch_count.
+
+Note about memory allocation:
+When trsm is launched with a k evenly divisible by the internal block size of 128,
+and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated
+memory found in the handle to increase overall performance. This memory can be managed by using
+the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory
+used for temporary storage will default to 1 MB and may result in chunking, which in turn may
+reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set
+to the desired chunk of right hand sides to be used at a time
+(where k is m when rocblas_side_left and is n when rocblas_side_right).
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+side    [rocblas_side]
+- rocblas_side_left:       op(A)*X = alpha*B
+- rocblas_side_right:      X*op(A) = alpha*B
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  each A_i is an upper triangular matrix.
+- rocblas_fill_lower:  each A_i is a  lower triangular matrix.
+@param[in]
+transA  [rocblas_operation]
+- transB:    op(A) = A
+- rocblas_operation_transpose:      op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of each B_i. m >= 0.
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of each B_i. n >= 0.
+@param[in]
+alpha
+device pointer or host pointer specifying the scalar alpha. When alpha is
+&zero then A is not referenced and B need not be set before
+entry.
+@param[in]
+A       device array of device pointers storing each matrix A_i on the GPU.
+Matricies are of dimension ( lda, k ), where k is m
+when  rocblas_side_left  and is  n  when  rocblas_side_right
+only the upper/lower triangular part is accessed.
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of each A_i.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+if side = rocblas_side_right, lda >= max( 1, n ).
+@param[in,out]
+B       device array of device pointers storing each matrix B_i on the GPU.
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of each B_i. ldb >= max( 1, m ).
+@param[in]
+batch_count [rocblas_int]
+number of trsm operatons in the batch.*/
+    pub fn rocblas_strsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *mut f32,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *mut f64,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strsm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        B: *const *mut f32,
+        ldb: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        B: *const *mut f64,
+        ldb: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *mut rocblas_float_complex,
+        ldb: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *mut rocblas_double_complex,
+        ldb: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+trsm_srided_batched performs the following strided batched operation:
+
+op(A_i)*X_i = alpha*B_i or
+X_i*op(A_i) = alpha*B_i, for i = 1, ..., batch_count,
+
+where alpha is a scalar, X and B are strided batched m by n matrices,
+
+A is triangular strided batched matrix and op(A) is one of
+
+op( A ) = A   or
+op( A ) = A^T   or
+op( A ) = A^H.
+
+Each matrix X_i is overwritten on B_i for i = 1, ..., batch_count.
+
+Note about memory allocation:
+When trsm is launched with a k evenly divisible by the internal block size of 128,
+and is no larger than 10 of these blocks, the API takes advantage of utilizing pre-allocated
+memory found in the handle to increase overall performance. This memory can be managed by using
+the environment variable WORKBUF_TRSM_B_CHNK. When this variable is not set the device memory
+used for temporary storage will default to 1 MB and may result in chunking, which in turn may
+reduce performance. Under these circumstances it is recommended that WORKBUF_TRSM_B_CHNK be set
+to the desired chunk of right hand sides to be used at a time
+(where k is m when rocblas_side_left and is n when rocblas_side_right).
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+side    [rocblas_side]
+- rocblas_side_left:       op(A)*X = alpha*B.
+- rocblas_side_right:      X*op(A) = alpha*B.
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  each A_i is an upper triangular matrix.
+- rocblas_fill_lower:  each A_i is a  lower triangular matrix.
+@param[in]
+transA  [rocblas_operation]
+- transB:    op(A) = A.
+- rocblas_operation_transpose:      op(A) = A^T.
+- rocblas_operation_conjugate_transpose:  op(A) = A^H.
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of each B_i. m >= 0.
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of each B_i. n >= 0.
+@param[in]
+alpha
+device pointer or host pointer specifying the scalar alpha. When alpha is
+&zero then A is not referenced and B need not be set before
+entry.
+@param[in]
+A       device pointer pointing to the first matrix A_1.
+of dimension ( lda, k ), where k is m
+when  rocblas_side_left  and
+is  n  when  rocblas_side_right
+only the upper/lower triangular part is accessed.
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of each A_i.
+
+if side = rocblas_side_left,  lda >= max( 1, m ).
+if side = rocblas_side_right, lda >= max( 1, n ).
+@param[in]
+stride_a [rocblas_stride]
+stride from the start of one A_i matrix to the next A_(i + 1).
+@param[in,out]
+B       device pointer pointing to the first matrix B_1.
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of each B_i. ldb >= max( 1, m ).
+@param[in]
+stride_b [rocblas_stride]
+stride from the start of one B_i matrix to the next B_(i + 1).
+@param[in]
+batch_count [rocblas_int]
+number of trsm operatons in the batch.*/
+    pub fn rocblas_strsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *mut rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_strsm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *mut f32,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dtrsm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *mut f64,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ctrsm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *mut rocblas_float_complex,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ztrsm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *mut rocblas_double_complex,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+gemm_kernel_name functions were never fully implemented and are deprecated for removal in a future release.
+
+Returns rocblas_status_not_implemented.*/
+    pub fn rocblas_hgemm_kernel_name(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_half,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemm_kernel_name(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_kernel_name(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+gemm performs one of the matrix-matrix operations:
+
+C = alpha*op( A )*op( B ) + beta*C,
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars, and A, B and C are matrices, with
+op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+
+Although not widespread, some gemm kernels may use atomic operations. See Atomic Operations
+in the API Reference Guide for more information.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+number or rows of matrices op( A ) and C.
+@param[in]
+n         [rocblas_int]
+number of columns of matrices op( B ) and C.
+@param[in]
+k         [rocblas_int]
+number of columns of matrix op( A ) and number of rows of matrix op( B ).
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+B         device pointer storing matrix B.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of B.
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in, out]
+C         device pointer storing matrix C on the GPU.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C.
+*/
+    pub fn rocblas_sgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        B: *const rocblas_half,
+        ldb: rocblas_int,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemm_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        B: *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        B: *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: i64,
+        B: *const rocblas_half,
+        ldb: i64,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+gemm_batched performs one of the batched matrix-matrix operations:
+
+C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars, and A, B and C are strided batched matrices, with
+
+op( A ) an m by k by batch_count matrices,
+op( B ) an k by n by batch_count matrices and
+C an m by n by batch_count matrices.
+
+@param[in]
+handle    [rocblas_handle
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimention m.
+@param[in]
+n         [rocblas_int]
+matrix dimention n.
+@param[in]
+k         [rocblas_int]
+matrix dimention k.
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+B         device array of device pointers storing each matrix B_i.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of each B_i.
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in, out]
+C         device array of device pointers storing each matrix C_i.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of each C_i.
+@param[in]
+batch_count
+[rocblas_int]
+number of gemm operations in the batch.*/
+    pub fn rocblas_sgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const *const rocblas_half,
+        lda: rocblas_int,
+        B: *const *const rocblas_half,
+        ldb: rocblas_int,
+        beta: *const rocblas_half,
+        C: *const *mut rocblas_half,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemm_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        B: *const *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        B: *const *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_half,
+        A: *const *const rocblas_half,
+        lda: i64,
+        B: *const *const rocblas_half,
+        ldb: i64,
+        beta: *const rocblas_half,
+        C: *const *mut rocblas_half,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+gemm_strided_batched performs one of the strided batched matrix-matrix operations:
+
+C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars, and A, B and C are strided batched matrices, with
+op( A ) an m by k by batch_count strided_batched matrix,
+op( B ) an k by n by batch_count strided_batched matrix and
+C an m by n by batch_count strided_batched matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimention m.
+@param[in]
+n         [rocblas_int]
+matrix dimention n.
+@param[in]
+k         [rocblas_int]
+matrix dimention k.
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device pointer pointing to the first matrix A_1.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+stride_a  [rocblas_stride]
+stride from the start of one A_i matrix to the next A_(i + 1).
+@param[in]
+B         device pointer pointing to the first matrix B_1.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of each B_i.
+@param[in]
+stride_b  [rocblas_stride]
+stride from the start of one B_i matrix to the next B_(i + 1).
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in, out]
+C         device pointer pointing to the first matrix C_1.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of each C_i.
+@param[in]
+stride_c  [rocblas_stride]
+stride from the start of one C_i matrix to the next C_(i + 1).
+@param[in]
+batch_count
+[rocblas_int]
+number of gemm operatons in the batch.
+*/
+    pub fn rocblas_sgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_half,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemm_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemm_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_hgemm_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_half,
+        A: *const rocblas_half,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const rocblas_half,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_half,
+        C: *mut rocblas_half,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemm_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemm_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+dgmm performs one of the matrix-matrix operations:
+
+C = A * diag(x) if side == rocblas_side_right
+C = diag(x) * A if side == rocblas_side_left
+
+where C and A are m by n dimensional matrices. diag( x ) is a diagonal matrix
+and x is vector of dimension n if side == rocblas_side_right and dimension m
+if side == rocblas_side_left.
+
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+side      [rocblas_side]
+specifies the side of diag(x).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment between values of x
+@param[in, out]
+C         device pointer storing matrix C.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C.
+*/
+    pub fn rocblas_sdgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        x: *const f32,
+        incx: rocblas_int,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        x: *const f64,
+        incx: rocblas_int,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sdgmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const f32,
+        lda: i64,
+        x: *const f32,
+        incx: i64,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const f64,
+        lda: i64,
+        x: *const f64,
+        incx: i64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+dgmm_batched performs one of the batched matrix-matrix operations:
+
+C_i = A_i * diag(x_i) for i = 0, 1, ... batch_count-1 if side == rocblas_side_right
+C_i = diag(x_i) * A_i for i = 0, 1, ... batch_count-1 if side == rocblas_side_left,
+
+where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix
+and x_i is vector of dimension n if side == rocblas_side_right and dimension m
+if side == rocblas_side_left.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+side      [rocblas_side]
+specifies the side of diag(x).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+A         device array of device pointers storing each matrix A_i on the GPU.
+Each A_i is of dimension ( lda, n ).
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A_i.
+@param[in]
+x         device array of device pointers storing each vector x_i on the GPU.
+Each x_i is of dimension n if side == rocblas_side_right and dimension
+m if side == rocblas_side_left.
+@param[in]
+incx      [rocblas_int]
+specifies the increment between values of x_i.
+@param[in, out]
+C         device array of device pointers storing each matrix C_i on the GPU.
+Each C_i is of dimension ( ldc, n ).
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+*/
+    pub fn rocblas_sdgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const f32,
+        lda: rocblas_int,
+        x: *const *const f32,
+        incx: rocblas_int,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const f64,
+        lda: rocblas_int,
+        x: *const *const f64,
+        incx: rocblas_int,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_float_complex,
+        incx: rocblas_int,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        x: *const *const rocblas_double_complex,
+        incx: rocblas_int,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sdgmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const *const f32,
+        lda: i64,
+        x: *const *const f32,
+        incx: i64,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const *const f64,
+        lda: i64,
+        x: *const *const f64,
+        incx: i64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        x: *const *const rocblas_float_complex,
+        incx: i64,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        x: *const *const rocblas_double_complex,
+        incx: i64,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+dgmm_strided_batched performs one of the batched matrix-matrix operations:
+
+C_i = A_i * diag(x_i)   if side == rocblas_side_right   for i = 0, 1, ... batch_count-1
+C_i = diag(x_i) * A_i   if side == rocblas_side_left    for i = 0, 1, ... batch_count-1,
+
+where C_i and A_i are m by n dimensional matrices. diag(x_i) is a diagonal matrix
+and x_i is vector of dimension n if side == rocblas_side_right and dimension m
+if side == rocblas_side_left.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+side      [rocblas_side]
+specifies the side of diag(x).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+A         device pointer to the first matrix A_0 on the GPU.
+Each A_i is of dimension ( lda, n ).
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+@param[in]
+x         pointer to the first vector x_0 on the GPU.
+Each x_i is of dimension n if side == rocblas_side_right and dimension
+m if side == rocblas_side_left.
+@param[in]
+incx      [rocblas_int]
+specifies the increment between values of x.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector(x_i) and the next one (x_i+1).
+@param[in, out]
+C         device pointer to the first matrix C_0 on the GPU.
+Each C_i is of dimension ( ldc, n ).
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C.
+@param[in]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+@param[in]
+batch_count [rocblas_int]
+number of instances i in the batch.
+*/
+    pub fn rocblas_sdgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f32,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const f64,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm_strided_batched(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: rocblas_int,
+        n: rocblas_int,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sdgmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const f32,
+        incx: i64,
+        stride_x: rocblas_stride,
+        C: *mut f32,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_ddgmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const f64,
+        incx: i64,
+        stride_x: rocblas_stride,
+        C: *mut f64,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cdgmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_float_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zdgmm_strided_batched_64(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        m: i64,
+        n: i64,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        x: *const rocblas_double_complex,
+        incx: i64,
+        stride_x: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+geam performs one of the matrix-matrix operations:
+
+C = alpha*op( A ) + beta*op( B ),
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars, and A, B and C are matrices, with
+op( A ) an m by n matrix, op( B ) an m by n matrix, and C an m by n matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device pointer storing matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in]
+B         device pointer storing matrix B.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of B.
+@param[in, out]
+C         device pointer storing matrix C.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C.
+*/
+    pub fn rocblas_sgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        B: *const f32,
+        ldb: rocblas_int,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        B: *const f64,
+        ldb: rocblas_int,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgeam_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        beta: *const f32,
+        B: *const f32,
+        ldb: i64,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        beta: *const f64,
+        B: *const f64,
+        ldb: i64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        beta: *const rocblas_float_complex,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        beta: *const rocblas_double_complex,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+geam_batched performs one of the batched matrix-matrix operations:
+
+C_i = alpha*op( A_i ) + beta*op( B_i )  for i = 0, 1, ... batch_count - 1,
+
+where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices
+and op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i on the GPU.
+Each A_i is of dimension ( lda, k ), where k is m
+when  transA == rocblas_operation_none and
+is  n  when  transA == rocblas_operation_transpose.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in]
+B         device array of device pointers storing each matrix B_i on the GPU.
+Each B_i is of dimension ( ldb, k ), where k is m
+when  transB == rocblas_operation_none and
+is  n  when  transB == rocblas_operation_transpose.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of B.
+@param[in, out]
+C         device array of device pointers storing each matrix C_i on the GPU.
+Each C_i is of dimension ( ldc, n ).
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C.
+
+@param[in]
+batch_count [rocblas_int]
+number of instances i in the batch.
+*/
+    pub fn rocblas_sgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        beta: *const f32,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        beta: *const f64,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_float_complex,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        beta: *const rocblas_double_complex,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgeam_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        beta: *const f32,
+        B: *const *const f32,
+        ldb: i64,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        beta: *const f64,
+        B: *const *const f64,
+        ldb: i64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        beta: *const rocblas_float_complex,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        beta: *const rocblas_double_complex,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+geam_strided_batched performs one of the batched matrix-matrix operations:
+
+C_i = alpha*op( A_i ) + beta*op( B_i )  for i = 0, 1, ... batch_count - 1,
+
+where alpha and beta are scalars, and op(A_i), op(B_i) and C_i are m by n matrices
+and op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+
+@param[in]
+A         device pointer to the first matrix A_0 on the GPU.
+Each A_i is of dimension ( lda, k ), where k is m
+when  transA == rocblas_operation_none and
+is  n  when  transA == rocblas_operation_transpose.
+
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+
+@param[in]
+stride_A  [rocblas_stride]
+stride from the start of one matrix (A_i) and the next one (A_i+1).
+
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+
+@param[in]
+B         pointer to the first matrix B_0 on the GPU.
+Each B_i is of dimension ( ldb, k ), where k is m
+when  transB == rocblas_operation_none and
+is  n  when  transB == rocblas_operation_transpose.
+
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of B.
+
+@param[in]
+stride_B  [rocblas_stride]
+stride from the start of one matrix (B_i) and the next one (B_i+1)
+
+@param[in, out]
+C         pointer to the first matrix C_0 on the GPU.
+Each C_i is of dimension ( ldc, n ).
+
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C.
+
+@param[in]
+stride_C  [rocblas_stride]
+stride from the start of one matrix (C_i) and the next one (C_i+1).
+
+@param[in]
+batch_count [rocblas_int]
+number of instances i in the batch.
+*/
+    pub fn rocblas_sgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam_strided_batched(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_C: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgeam_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const f32,
+        B: *const f32,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut f32,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgeam_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const f64,
+        B: *const f64,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut f64,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgeam_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgeam_strided_batched_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_A: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_B: rocblas_stride,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_C: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+gemm_ex performs one of the matrix-matrix operations:
+
+D = alpha*op( A )*op( B ) + beta*C,
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars, and A, B, C, and D are matrices, with
+op( A ) an m by k matrix, op( B ) a k by n matrix and C and D are m by n matrices.
+C and D may point to the same matrix if their parameters are identical.
+
+Supported types are as follows:
+- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =
+compute_type
+- rocblas_datatype_f16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =
+compute_type
+- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =
+compute_type
+- rocblas_datatype_bf16_r = a_type = b_type; rocblas_datatype_f32_r = c_type = d_type =
+compute_type
+- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =
+compute_type
+- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type
+
+Although not widespread, some gemm kernels used by gemm_ex may use atomic operations.
+See Atomic Operations in the API Reference Guide for more information.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+k         [rocblas_int]
+matrix dimension k.
+@param[in]
+alpha     [const void *]
+device pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.
+@param[in]
+a         [void *]
+device pointer storing matrix A.
+@param[in]
+a_type    [rocblas_datatype]
+specifies the datatype of matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A.
+@param[in]
+b         [void *]
+device pointer storing matrix B.
+@param[in]
+b_type    [rocblas_datatype]
+specifies the datatype of matrix B.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of B.
+@param[in]
+beta      [const void *]
+device pointer or host pointer specifying the scalar beta. Same datatype as compute_type.
+@param[in]
+c         [void *]
+device pointer storing matrix C.
+@param[in]
+c_type    [rocblas_datatype]
+specifies the datatype of matrix C.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C.
+@param[out]
+d         [void *]
+device pointer storing matrix D.
+If d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc
+or the respective invalid status will be returned.
+@param[in]
+d_type    [rocblas_datatype]
+specifies the datatype of matrix D.
+@param[in]
+ldd       [rocblas_int]
+specifies the leading dimension of D.
+@param[in]
+compute_type
+[rocblas_datatype]
+specifies the datatype of computation.
+@param[in]
+algo      [rocblas_gemm_algo]
+enumerant specifying the algorithm type.
+@param[in]
+solution_index
+[int32_t]
+if algo is rocblas_gemm_algo_solution_index, this controls which solution is used.
+When algo is not rocblas_gemm_algo_solution_index, or if solution_index <= 0, the default solution is used.
+This parameter was unused in previous releases and instead always used the default solution
+@param[in]
+flags     [uint32_t]
+optional gemm flags.
+*/
+    pub fn rocblas_gemm_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        a: *const ::core::ffi::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        b: *const ::core::ffi::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        beta: *const ::core::ffi::c_void,
+        c: *const ::core::ffi::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        d: *mut ::core::ffi::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_gemm_ex_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const ::core::ffi::c_void,
+        a: *const ::core::ffi::c_void,
+        a_type: rocblas_datatype,
+        lda: i64,
+        b: *const ::core::ffi::c_void,
+        b_type: rocblas_datatype,
+        ldb: i64,
+        beta: *const ::core::ffi::c_void,
+        c: *const ::core::ffi::c_void,
+        c_type: rocblas_datatype,
+        ldc: i64,
+        d: *mut ::core::ffi::c_void,
+        d_type: rocblas_datatype,
+        ldd: i64,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+gemm_batched_ex performs one of the batched matrix-matrix operations:
+D_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count.
+where op( X ) is one of
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+alpha and beta are scalars, and A, B, C, and D are batched pointers to matrices, with
+op( A ) an m by k by batch_count batched matrix,
+op( B ) a k by n by batch_count batched matrix and
+C and D are m by n by batch_count batched matrices.
+The batched matrices are an array of pointers to matrices.
+The number of pointers to matrices is batch_count.
+C and D may point to the same matrices if their parameters are identical.
+
+Supported types are as follows:
+- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =
+compute_type
+- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =
+compute_type
+- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =
+compute_type
+- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+k         [rocblas_int]
+matrix dimension k.
+@param[in]
+alpha     [const void *]
+device pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.
+@param[in]
+a         [void *]
+device pointer storing array of pointers to each matrix A_i.
+@param[in]
+a_type    [rocblas_datatype]
+specifies the datatype of each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+b         [void *]
+device pointer storing array of pointers to each matrix B_i.
+@param[in]
+b_type    [rocblas_datatype]
+specifies the datatype of each matrix B_i.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of each B_i.
+@param[in]
+beta      [const void *]
+device pointer or host pointer specifying the scalar beta. Same datatype as compute_type.
+@param[in]
+c         [void *]
+device array of device pointers to each matrix C_i.
+@param[in]
+c_type    [rocblas_datatype]
+specifies the datatype of each matrix C_i.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of each C_i.
+@param[out]
+d         [void *]
+device array of device pointers to each matrix D_i.
+If d and c are the same array of matrix pointers then d_type must equal c_type and ldd must equal ldc
+or the respective invalid status will be returned.
+@param[in]
+d_type    [rocblas_datatype]
+specifies the datatype of each matrix D_i.
+@param[in]
+ldd       [rocblas_int]
+specifies the leading dimension of each D_i.
+@param[in]
+batch_count
+[rocblas_int]
+number of gemm operations in the batch.
+@param[in]
+compute_type
+[rocblas_datatype]
+specifies the datatype of computation.
+@param[in]
+algo      [rocblas_gemm_algo]
+enumerant specifying the algorithm type.
+@param[in]
+solution_index
+[int32_t]
+if algo is rocblas_gemm_algo_solution_index, this controls which solution is used.
+When algo is not rocblas_gemm_algo_solution_index, or if solution_index <= 0, the default solution is used.
+This parameter was unused in previous releases and instead always used the default solution
+@param[in]
+flags     [uint32_t]
+optional gemm flags.
+*/
+    pub fn rocblas_gemm_batched_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        a: *const ::core::ffi::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        b: *const ::core::ffi::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        beta: *const ::core::ffi::c_void,
+        c: *const ::core::ffi::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        d: *mut ::core::ffi::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        batch_count: rocblas_int,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_gemm_batched_ex_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const ::core::ffi::c_void,
+        a: *const ::core::ffi::c_void,
+        a_type: rocblas_datatype,
+        lda: i64,
+        b: *const ::core::ffi::c_void,
+        b_type: rocblas_datatype,
+        ldb: i64,
+        beta: *const ::core::ffi::c_void,
+        c: *const ::core::ffi::c_void,
+        c_type: rocblas_datatype,
+        ldc: i64,
+        d: *mut ::core::ffi::c_void,
+        d_type: rocblas_datatype,
+        ldd: i64,
+        batch_count: i64,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+gemm_strided_batched_ex performs one of the strided_batched matrix-matrix operations:
+
+D_i = alpha*op(A_i)*op(B_i) + beta*C_i, for i = 1, ..., batch_count
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars, and A, B, C, and D are strided_batched matrices, with
+op( A ) an m by k by batch_count strided_batched matrix,
+op( B ) a k by n by batch_count strided_batched matrix and
+C and D are m by n by batch_count strided_batched matrices.
+C and D may point to the same matrices if their parameters are identical.
+
+The strided_batched matrices are multiple matrices separated by a constant stride.
+The number of matrices is batch_count.
+
+Supported types are as follows:
+- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =
+compute_type
+- rocblas_datatype_bf16_r = a_type = b_type = c_type = d_type; rocblas_datatype_f32_r =
+compute_type
+- rocblas_datatype_i8_r = a_type = b_type; rocblas_datatype_i32_r = c_type = d_type =
+compute_type
+- rocblas_datatype_f32_c  = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f64_c  = a_type = b_type = c_type = d_type = compute_type
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+k         [rocblas_int]
+matrix dimension k.
+@param[in]
+alpha     [const void *]
+device pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.
+@param[in]
+a         [void *]
+device pointer pointing to first matrix A_1.
+@param[in]
+a_type    [rocblas_datatype]
+specifies the datatype of each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i.
+@param[in]
+stride_a  [rocblas_stride]
+specifies stride from start of one A_i matrix to the next A_(i + 1).
+@param[in]
+b         [void *]
+device pointer pointing to first matrix B_1.
+@param[in]
+b_type    [rocblas_datatype]
+specifies the datatype of each matrix B_i.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of each B_i.
+@param[in]
+stride_b  [rocblas_stride]
+specifies stride from start of one B_i matrix to the next B_(i + 1).
+@param[in]
+beta      [const void *]
+device pointer or host pointer specifying the scalar beta. Same datatype as compute_type.
+@param[in]
+c         [void *]
+device pointer pointing to first matrix C_1.
+@param[in]
+c_type    [rocblas_datatype]
+specifies the datatype of each matrix C_i.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of each C_i.
+@param[in]
+stride_c  [rocblas_stride]
+specifies stride from start of one C_i matrix to the next C_(i + 1).
+@param[out]
+d         [void *]
+device pointer storing each matrix D_i.
+If d and c pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc
+and stride_d must equal stride_c or the respective invalid status will be returned.
+@param[in]
+d_type    [rocblas_datatype]
+specifies the datatype of each matrix D_i.
+@param[in]
+ldd       [rocblas_int]
+specifies the leading dimension of each D_i.
+@param[in]
+stride_d  [rocblas_stride]
+specifies stride from start of one D_i matrix to the next D_(i + 1).
+@param[in]
+batch_count
+[rocblas_int]
+number of gemm operations in the batch.
+@param[in]
+compute_type
+[rocblas_datatype]
+specifies the datatype of computation.
+@param[in]
+algo      [rocblas_gemm_algo]
+enumerant specifying the algorithm type.
+@param[in]
+solution_index
+[int32_t]
+if algo is rocblas_gemm_algo_solution_index, this controls which solution is used.
+When algo is not rocblas_gemm_algo_solution_index, or if solution_index <= 0, the default solution is used.
+This parameter was unused in previous releases and instead always used the default solution
+@param[in]
+flags     [uint32_t]
+optional gemm flags.
+*/
+    pub fn rocblas_gemm_strided_batched_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        a: *const ::core::ffi::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        b: *const ::core::ffi::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const ::core::ffi::c_void,
+        c: *const ::core::ffi::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        d: *mut ::core::ffi::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        stride_d: rocblas_stride,
+        batch_count: rocblas_int,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_gemm_strided_batched_ex_64(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: i64,
+        n: i64,
+        k: i64,
+        alpha: *const ::core::ffi::c_void,
+        a: *const ::core::ffi::c_void,
+        a_type: rocblas_datatype,
+        lda: i64,
+        stride_a: rocblas_stride,
+        b: *const ::core::ffi::c_void,
+        b_type: rocblas_datatype,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const ::core::ffi::c_void,
+        c: *const ::core::ffi::c_void,
+        c_type: rocblas_datatype,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        d: *mut ::core::ffi::c_void,
+        d_type: rocblas_datatype,
+        ldd: i64,
+        stride_d: rocblas_stride,
+        batch_count: i64,
+        compute_type: rocblas_datatype,
+        algo: rocblas_gemm_algo,
+        solution_index: i32,
+        flags: u32,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+gemmt performs matrix-matrix operations and updates the upper or lower triangular part of the result matrix:
+
+C = alpha*op( A )*op( B ) + beta*C,
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars. A, B  are general matrices and C is either an upper or lower triangular matrix, with
+op( A ) an n by k matrix, op( B ) a k by n matrix and C an n by n matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+@param[in]
+transA    [rocblas_operation]
+- rocblas_operation_none:    op(A) = A.
+- rocblas_operation_transpose:      op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+@param[in]
+transB    [rocblas_operation]
+- rocblas_operation_none:    op(B) = B.
+- rocblas_operation_transpose:      op(B) = B^T
+- rocblas_operation_conjugate_transpose:  op(B) = B^H
+@param[in]
+n         [rocblas_int]
+number or rows of matrices op( A ), columns of op( B ), and (rows, columns) of C.
+@param[in]
+k         [rocblas_int]
+number of rows of matrices op( B ) and columns of op( A ).
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device pointer storing matrix A. If transa = rocblas_operation_none, then, the leading n-by-k part of the array contains the matrix A, otherwise the leading k-by-n part of the array contains the matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A. If transA == rocblas_operation_none, must have lda >= max(1, n), otherwise, must have lda >= max(1, k).
+@param[in]
+B         device pointer storing matrix B. If transB = rocblas_operation_none, then, the leading k-by-n part of the array contains the matrix B, otherwise the leading n-by-k part of the array contains the matrix B.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of B. If transB == rocblas_operation_none, must have ldb >= max(1, k), otherwise, must have ldb >= max(1, n)
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in, out]
+C         device pointer storing matrix C on the GPU. If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains the matrix C, otherwise the lower triangular part of the leading n-by-n array contains the matrix C.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C. Must have ldc >= max(1, n).
+*/
+    pub fn rocblas_sgemmt(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        B: *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        B: *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemmt_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        B: *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        B: *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+gemmt_batched performs matrix-matrix operations and updates the upper or lower triangular part of the result matrix:
+
+C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars. A, B  are general matrices and C is either an upper or lower triangular matrix, with
+
+op( A ) an n by k by batch_count matrices,
+op( B ) an k by n by batch_count matrices and
+C an n by n by batch_count matrices.
+
+@param[in]
+handle    [rocblas_handle
+handle to the rocblas library context queue.
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+@param[in]
+transA    [rocblas_operation]
+- rocblas_operation_none:    op(A_i) = A_i.
+- rocblas_operation_transpose:      op(A_i) = A_i^T
+- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H
+@param[in]
+transB    [rocblas_operation]
+- rocblas_operation_none:    op(B_i) = B_i.
+- rocblas_operation_transpose:      op(B_i) = B_i^T
+- rocblas_operation_conjugate_transpose:  op(B_i) = B_i^H
+@param[in]
+n         [rocblas_int]
+number or rows of matrices op( A_i ), columns of op( B_i ), and (rows, columns) of C_i.
+@param[in]
+k         [rocblas_int]
+number of rows of matrices op( B_i ) and columns of op( A_i ).
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i. If transa = rocblas_operation_none, then, the leading n-by-k part of the array contains each matrix A_i, otherwise the leading k-by-n part of the array contains each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. If transA == rocblas_operation_none, must have lda >= max(1, n), otherwise, must have lda >= max(1, k).
+@param[in]
+B         device array of device pointers storing each matrix B_i. If transB = rocblas_operation_none, then, the leading k-by-n part of the array contains each matrix B_i, otherwise the leading n-by-k part of the array contains each matrix B_i.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of each B_i. If transB == rocblas_operation_none, must have ldb >= max(1, k), otherwise, must have ldb >= max(1, n).
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in, out]
+C         device array of device pointers storing each matrix C_i. If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains each matrix C_i, otherwise the lower triangular part of the leading n-by-n array contains each matrix C_i.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of each C_i. Must have ldc >= max(1, n).
+@param[in]
+batch_count
+[rocblas_int]
+number of gemm operations in the batch.*/
+    pub fn rocblas_sgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: rocblas_int,
+        B: *const *const f32,
+        ldb: rocblas_int,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: rocblas_int,
+        B: *const *const f64,
+        ldb: rocblas_int,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_float_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: rocblas_int,
+        B: *const *const rocblas_double_complex,
+        ldb: rocblas_int,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemmt_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const *const f32,
+        lda: i64,
+        B: *const *const f32,
+        ldb: i64,
+        beta: *const f32,
+        C: *const *mut f32,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const *const f64,
+        lda: i64,
+        B: *const *const f64,
+        ldb: i64,
+        beta: *const f64,
+        C: *const *mut f64,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const *const rocblas_float_complex,
+        lda: i64,
+        B: *const *const rocblas_float_complex,
+        ldb: i64,
+        beta: *const rocblas_float_complex,
+        C: *const *mut rocblas_float_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const *const rocblas_double_complex,
+        lda: i64,
+        B: *const *const rocblas_double_complex,
+        ldb: i64,
+        beta: *const rocblas_double_complex,
+        C: *const *mut rocblas_double_complex,
+        ldc: i64,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 3 API </b>
+
+\details
+gemmt_strided_batched performs matrix-matrix operations and updates the upper or lower triangular part of the result matrix:
+
+C_i = alpha*op( A_i )*op( B_i ) + beta*C_i, for i = 1, ..., batch_count,
+
+where op( X ) is one of
+
+op( X ) = X      or
+op( X ) = X**T   or
+op( X ) = X**H,
+
+alpha and beta are scalars. A, B  are general matrices and C is either an upper or lower triangular matrix, with
+op( A ) an n by k by batch_count strided_batched matrix,
+op( B ) an k by n by batch_count strided_batched matrix and
+C an n by n by batch_count strided_batched matrix.
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  C is an upper triangular matrix
+- rocblas_fill_lower:  C is a  lower triangular matrix
+@param[in]
+transA    [rocblas_operation]
+- rocblas_operation_none:    op(A_i) = A_i.
+- rocblas_operation_transpose:      op(A_i) = A_i^T
+- rocblas_operation_conjugate_transpose:  op(A_i) = A_i^H
+@param[in]
+transB    [rocblas_operation]
+- rocblas_operation_none:    op(B_i) = B_i.
+- rocblas_operation_transpose:      op(B_i) = B_i^T
+- rocblas_operation_conjugate_transpose:  op(B_i) = B_i^H
+@param[in]
+n         [rocblas_int]
+number or rows of matrices op( A_i ), columns of op( B_i ), and (rows, columns) of C_i.
+@param[in]
+k         [rocblas_int]
+number of rows of matrices op( B_i ) and columns of op( A_i ).
+@param[in]
+alpha     device pointer or host pointer specifying the scalar alpha.
+@param[in]
+A         device array of device pointers storing each matrix A_i. If transa = rocblas_operation_none, then, the leading n-by-k part of the array contains each matrix A_i, otherwise the leading k-by-n part of the array contains each matrix A_i.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of each A_i. If transA == rocblas_operation_none, must have lda >= max(1, n), otherwise, must have lda >= max(1, k).
+@param[in]
+stride_a  [rocblas_stride]
+stride from the start of one A_i matrix to the next A_(i + 1).
+@param[in]
+B         device array of device pointers storing each matrix B_i. If transB = rocblas_operation_none, then, the leading k-by-n part of the array contains each matrix B_i, otherwise the leading n-by-k part of the array contains each matrix B_i.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of each B_i. If transB == rocblas_operation_none, must have ldb >= max(1, k), otherwise, must have ldb >= max(1, n).
+@param[in]
+stride_b  [rocblas_stride]
+stride from the start of one B_i matrix to the next B_(i + 1).
+@param[in]
+beta      device pointer or host pointer specifying the scalar beta.
+@param[in, out]
+C         device array of device pointers storing each matrix C_i. If uplo == rocblas_fill_upper, the upper triangular part of the leading n-by-n array contains each matrix C_i, otherwise the lower triangular part of the leading n-by-n array contains each matrix C_i.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of each C_i. Must have ldc >= max(1, n).
+@param[in]
+stride_c  [rocblas_stride]
+stride from the start of one C_i matrix to the next C_(i + 1).
+@param[in]
+batch_count
+[rocblas_int]
+number of gemm operatons in the batch.
+*/
+    pub fn rocblas_sgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f32,
+        A: *const f32,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const f64,
+        A: *const f64,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt_strided_batched(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: rocblas_int,
+        stride_a: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: rocblas_int,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: rocblas_int,
+        stride_c: rocblas_stride,
+        batch_count: rocblas_int,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_sgemmt_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f32,
+        A: *const f32,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const f32,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const f32,
+        C: *mut f32,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dgemmt_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const f64,
+        A: *const f64,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const f64,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const f64,
+        C: *mut f64,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_cgemmt_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_float_complex,
+        A: *const rocblas_float_complex,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const rocblas_float_complex,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_float_complex,
+        C: *mut rocblas_float_complex,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_zgemmt_strided_batched_64(
+        handle: rocblas_handle,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        n: i64,
+        k: i64,
+        alpha: *const rocblas_double_complex,
+        A: *const rocblas_double_complex,
+        lda: i64,
+        stride_a: rocblas_stride,
+        B: *const rocblas_double_complex,
+        ldb: i64,
+        stride_b: rocblas_stride,
+        beta: *const rocblas_double_complex,
+        C: *mut rocblas_double_complex,
+        ldc: i64,
+        stride_c: rocblas_stride,
+        batch_count: i64,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+geam_ex performs one of the matrix-matrix operations:
+
+Dij = min(alpha * (Aik + Bkj), beta * Cij)
+Dij = min(alpha * Aik, alpha * Bkj) + beta * Cij
+
+alpha and beta are scalars, and A, B, C, and D are matrices, with
+op( A ) an m by k matrix, op( B ) a k by n matrix and C and D are m by n matrices.
+C and D may point to the same matrix if their type and leading dimensions are identical.
+
+Aik refers to the element at the i-th row and k-th column of op( A ), Bkj refers to
+the element at the k-th row and j-th column of op( B ), and Cij/Dij refers to the element
+at the i-th row and j-th column of C/D.
+
+Supported types are as follows:
+- rocblas_datatype_f64_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f32_r = a_type = b_type = c_type = d_type = compute_type
+- rocblas_datatype_f16_r = a_type = b_type = c_type = d_type = compute_type
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+transA    [rocblas_operation]
+specifies the form of op( A ).
+@param[in]
+transB    [rocblas_operation]
+specifies the form of op( B ).
+@param[in]
+m         [rocblas_int]
+matrix dimension m.
+@param[in]
+n         [rocblas_int]
+matrix dimension n.
+@param[in]
+k         [rocblas_int]
+matrix dimension k.
+@param[in]
+alpha     [const void *]
+device pointer or host pointer specifying the scalar alpha. Same datatype as compute_type.
+@param[in]
+A         [void *]
+device pointer storing matrix A.
+@param[in]
+a_type    [rocblas_datatype]
+specifies the datatype of matrix A.
+@param[in]
+lda       [rocblas_int]
+specifies the leading dimension of A
+
+if transA == N, must have lda >= max(1, m)
+otherwise, must have lda >= max(1, k)
+@param[in]
+B         [void *]
+device pointer storing matrix B.
+@param[in]
+b_type    [rocblas_datatype]
+specifies the datatype of matrix B.
+@param[in]
+ldb       [rocblas_int]
+specifies the leading dimension of B
+
+if transB == N, must have ldb >= max(1, k)
+otherwise, must have ldb >= max(1, n)
+@param[in]
+beta      [const void *]
+device pointer or host pointer specifying the scalar beta. Same datatype as compute_type.
+@param[in]
+C         [void *]
+device pointer storing matrix C.
+@param[in]
+c_type    [rocblas_datatype]
+specifies the datatype of matrix C.
+@param[in]
+ldc       [rocblas_int]
+specifies the leading dimension of C, must have ldc >= max(1, m).
+@param[out]
+D         [void *]
+device pointer storing matrix D.
+If D and C pointers are to the same matrix then d_type must equal c_type and ldd must equal ldc
+or the respective invalid status will be returned.
+@param[in]
+d_type    [rocblas_datatype]
+specifies the datatype of matrix D.
+@param[in]
+ldd       [rocblas_int]
+specifies the leading dimension of D, must have ldd >= max(1, m).
+@param[in]
+compute_type
+[rocblas_datatype]
+specifies the datatype of computation.
+@param[in]
+geam_ex_op [rocblas_geam_ex_operation]
+enumerant specifying the operation type, support for rocblas_geam_ex_operation_min_plus and rocblas_geam_ex_operation_plus_min.
+*/
+    pub fn rocblas_geam_ex(
+        handle: rocblas_handle,
+        transA: rocblas_operation,
+        transB: rocblas_operation,
+        m: rocblas_int,
+        n: rocblas_int,
+        k: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        A: *const ::core::ffi::c_void,
+        a_type: rocblas_datatype,
+        lda: rocblas_int,
+        B: *const ::core::ffi::c_void,
+        b_type: rocblas_datatype,
+        ldb: rocblas_int,
+        beta: *const ::core::ffi::c_void,
+        C: *const ::core::ffi::c_void,
+        c_type: rocblas_datatype,
+        ldc: rocblas_int,
+        D: *mut ::core::ffi::c_void,
+        d_type: rocblas_datatype,
+        ldd: rocblas_int,
+        compute_type: rocblas_datatype,
+        geam_ex_op: rocblas_geam_ex_operation,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+trsm_ex solves:
+
+op(A)*X = alpha*B or X*op(A) = alpha*B,
+
+where alpha is a scalar, X and B are m by n matrices,
+A is triangular matrix and op(A) is one of
+
+op( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H.
+
+The matrix X is overwritten on B.
+
+This function gives the user the ability to reuse the invA matrix between runs.
+If invA == NULL, rocblas_trsm_ex will automatically calculate invA on every run.
+
+Setting up invA:
+The accepted invA matrix consists of the packed 128x128 inverses of the diagonal blocks of
+matrix A, followed by any smaller diagonal block that remains.
+To set up invA it is recommended that rocblas_trtri_batched be used with matrix A as the input.
+
+Device memory of size 128 x k should be allocated for invA ahead of time, where k is m when
+rocblas_side_left and is n when rocblas_side_right. The actual number of elements in invA
+should be passed as invA_size.
+
+To begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of
+matrix A. Below are the restricted parameters:
+- n = 128
+- ldinvA = 128
+- stride_invA = 128x128
+- batch_count = k / 128,
+
+Then any remaining block may be added:
+- n = k % 128
+- invA = invA + stride_invA * previous_batch_count
+- ldinvA = 128
+- batch_count = 1
+
+Although not widespread, some gemm kernels used by trsm_ex may use atomic operations.
+See Atomic Operations in the API Reference Guide for more information.
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side    [rocblas_side]
+- rocblas_side_left:       op(A)*X = alpha*B
+- rocblas_side_right:      X*op(A) = alpha*B
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  A is an upper triangular matrix.
+- rocblas_fill_lower:  A is a lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+- transB:    op(A) = A.
+- rocblas_operation_transpose:      op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     A is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  A is not assumed to be unit triangular.
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of B. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of B. n >= 0.
+
+@param[in]
+alpha   [void *]
+device pointer or host pointer specifying the scalar alpha. When alpha is
+&zero then A is not referenced, and B need not be set before
+entry.
+
+@param[in]
+A       [void *]
+device pointer storing matrix A.
+of dimension ( lda, k ), where k is m
+when rocblas_side_left and
+is n when rocblas_side_right
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+if side = rocblas_side_right, lda >= max( 1, n ).
+
+@param[in, out]
+B       [void *]
+device pointer storing matrix B.
+B is of dimension ( ldb, n ).
+Before entry, the leading m by n part of the array B must
+contain the right-hand side matrix B, and on exit is
+overwritten by the solution matrix X.
+
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of B. ldb >= max( 1, m ).
+
+@param[in]
+invA    [void *]
+device pointer storing the inverse diagonal blocks of A.
+invA is of dimension ( ld_invA, k ), where k is m
+when rocblas_side_left and
+is n when rocblas_side_right.
+ld_invA must be equal to 128.
+
+@param[in]
+invA_size [rocblas_int]
+invA_size specifies the number of elements of device memory in invA.
+
+@param[in]
+compute_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_trsm_ex(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        A: *const ::core::ffi::c_void,
+        lda: rocblas_int,
+        B: *mut ::core::ffi::c_void,
+        ldb: rocblas_int,
+        invA: *const ::core::ffi::c_void,
+        invA_size: rocblas_int,
+        compute_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+trsm_batched_ex solves:
+
+op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i,
+
+for i = 1, ..., batch_count; and where alpha is a scalar, X and B are arrays of m by n matrices,
+A is an array of triangular matrix and each op(A_i) is one of
+
+op( A_i ) = A_i   or   op( A_i ) = A_i^T   or   op( A_i ) = A_i^H.
+
+Each matrix X_i is overwritten on B_i.
+
+This function gives the user the ability to reuse the invA matrix between runs.
+If invA == NULL, rocblas_trsm_batched_ex will automatically calculate each invA_i on every run.
+
+Setting up invA:
+Each accepted invA_i matrix consists of the packed 128x128 inverses of the diagonal blocks of
+matrix A_i, followed by any smaller diagonal block that remains.
+To set up each invA_i it is recommended that rocblas_trtri_batched be used with matrix A_i as the input.
+invA is an array of pointers of batch_count length holding each invA_i.
+
+Device memory of size 128 x k should be allocated for each invA_i ahead of time, where k is m when
+rocblas_side_left and is n when rocblas_side_right. The actual number of elements in each invA_i
+should be passed as invA_size.
+
+To begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of each
+matrix A_i. Below are the restricted parameters:
+- n = 128
+- ldinvA = 128
+- stride_invA = 128x128
+- batch_count = k / 128,
+
+Then any remaining block may be added:
+- n = k % 128
+- invA = invA + stride_invA * previous_batch_count
+- ldinvA = 128
+- batch_count = 1
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side    [rocblas_side]
+- rocblas_side_left:       op(A)*X = alpha*B
+- rocblas_side_right:      X*op(A) = alpha*B
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  each A_i is an upper triangular matrix.
+- rocblas_fill_lower:  each A_i is a lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+- transB:    op(A) = A.
+- rocblas_operation_transpose:      op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of each B_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of each B_i. n >= 0.
+
+@param[in]
+alpha   [void *]
+device pointer or host pointer alpha specifying the scalar alpha. When alpha is
+&zero then A is not referenced, and B need not be set before
+entry.
+
+@param[in]
+A       [void *]
+device array of device pointers storing each matrix A_i.
+each A_i is of dimension ( lda, k ), where k is m
+when rocblas_side_left and
+is n when rocblas_side_right
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of each A_i.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+if side = rocblas_side_right, lda >= max( 1, n ).
+
+@param[in, out]
+B       [void *]
+device array of device pointers storing each matrix B_i.
+each B_i is of dimension ( ldb, n ).
+Before entry, the leading m by n part of the array B_i must
+contain the right-hand side matrix B_i, and on exit is
+overwritten by the solution matrix X_i
+
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of each B_i. ldb >= max( 1, m ).
+
+@param[in]
+batch_count [rocblas_int]
+specifies how many batches.
+
+@param[in]
+invA    [void *]
+device array of device pointers storing the inverse diagonal blocks of each A_i.
+each invA_i is of dimension ( ld_invA, k ), where k is m
+when rocblas_side_left and
+is n when rocblas_side_right.
+ld_invA must be equal to 128.
+
+@param[in]
+invA_size [rocblas_int]
+invA_size specifies the number of elements of device memory in each invA_i.
+
+@param[in]
+compute_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_trsm_batched_ex(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        A: *const ::core::ffi::c_void,
+        lda: rocblas_int,
+        B: *mut ::core::ffi::c_void,
+        ldb: rocblas_int,
+        batch_count: rocblas_int,
+        invA: *const ::core::ffi::c_void,
+        invA_size: rocblas_int,
+        compute_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+trsm_strided_batched_ex solves:
+
+op(A_i)*X_i = alpha*B_i or X_i*op(A_i) = alpha*B_i,
+
+for i = 1, ..., batch_count; and where alpha is a scalar, X and B are strided batched m by n matrices,
+A is a strided batched triangular matrix and op(A_i) is one of
+
+op( A_i ) = A_i   or   op( A_i ) = A_i^T   or   op( A_i ) = A_i^H.
+
+Each matrix X_i is overwritten on B_i.
+
+This function gives the user the ability to reuse each invA_i matrix between runs.
+If invA == NULL, rocblas_trsm_batched_ex will automatically calculate each invA_i on every run.
+
+Setting up invA:
+Each accepted invA_i matrix consists of the packed 128x128 inverses of the diagonal blocks of
+matrix A_i, followed by any smaller diagonal block that remains.
+To set up invA_i it is recommended that rocblas_trtri_batched be used with matrix A_i as the input.
+invA is a contiguous piece of memory holding each invA_i.
+
+Device memory of size 128 x k should be allocated for each invA_i ahead of time, where k is m when
+rocblas_side_left and is n when rocblas_side_right. The actual number of elements in each invA_i
+should be passed as invA_size.
+
+To begin, rocblas_trtri_batched must be called on the full 128x128-sized diagonal blocks of each
+matrix A_i. Below are the restricted parameters:
+- n = 128
+- ldinvA = 128
+- stride_invA = 128x128
+- batch_count = k / 128
+
+Then any remaining block may be added:
+- n = k % 128
+- invA = invA + stride_invA * previous_batch_count
+- ldinvA = 128
+- batch_count = 1
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+
+@param[in]
+side    [rocblas_side]
+- rocblas_side_left:       op(A)*X = alpha*B
+- rocblas_side_right:      X*op(A) = alpha*B
+
+@param[in]
+uplo    [rocblas_fill]
+- rocblas_fill_upper:  each A_i is an upper triangular matrix.
+- rocblas_fill_lower:  each A_i is a lower triangular matrix.
+
+@param[in]
+transA  [rocblas_operation]
+- transB:    op(A) = A.
+- rocblas_operation_transpose:      op(A) = A^T
+- rocblas_operation_conjugate_transpose:  op(A) = A^H
+
+@param[in]
+diag    [rocblas_diagonal]
+- rocblas_diagonal_unit:     each A_i is assumed to be unit triangular.
+- rocblas_diagonal_non_unit:  each A_i is not assumed to be unit triangular.
+
+@param[in]
+m       [rocblas_int]
+m specifies the number of rows of each B_i. m >= 0.
+
+@param[in]
+n       [rocblas_int]
+n specifies the number of columns of each B_i. n >= 0.
+
+@param[in]
+alpha   [void *]
+device pointer or host pointer specifying the scalar alpha. When alpha is
+&zero then A is not referenced, and B need not be set before
+entry.
+
+@param[in]
+A       [void *]
+device pointer storing matrix A.
+of dimension ( lda, k ), where k is m
+when rocblas_side_left and
+is n when rocblas_side_right
+only the upper/lower triangular part is accessed.
+
+@param[in]
+lda     [rocblas_int]
+lda specifies the first dimension of A.
+
+if side = rocblas_side_left,  lda >= max( 1, m ),
+if side = rocblas_side_right, lda >= max( 1, n ).
+
+@param[in]
+stride_A [rocblas_stride]
+The stride between each A matrix.
+
+@param[in, out]
+B       [void *]
+device pointer pointing to first matrix B_i.
+each B_i is of dimension ( ldb, n ).
+Before entry, the leading m by n part of each array B_i must
+contain the right-hand side of matrix B_i, and on exit is
+overwritten by the solution matrix X_i.
+
+@param[in]
+ldb    [rocblas_int]
+ldb specifies the first dimension of each B_i. ldb >= max( 1, m ).
+
+@param[in]
+stride_B [rocblas_stride]
+The stride between each B_i matrix.
+
+@param[in]
+batch_count [rocblas_int]
+specifies how many batches.
+
+@param[in]
+invA    [void *]
+device pointer storing the inverse diagonal blocks of each A_i.
+invA points to the first invA_1.
+each invA_i is of dimension ( ld_invA, k ), where k is m
+when rocblas_side_left and
+is n when rocblas_side_right.
+ld_invA must be equal to 128.
+
+@param[in]
+invA_size [rocblas_int]
+invA_size specifies the number of elements of device memory in each invA_i.
+
+@param[in]
+stride_invA [rocblas_stride]
+The stride between each invA matrix.
+
+@param[in]
+compute_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_trsm_strided_batched_ex(
+        handle: rocblas_handle,
+        side: rocblas_side,
+        uplo: rocblas_fill,
+        transA: rocblas_operation,
+        diag: rocblas_diagonal,
+        m: rocblas_int,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        A: *const ::core::ffi::c_void,
+        lda: rocblas_int,
+        stride_A: rocblas_stride,
+        B: *mut ::core::ffi::c_void,
+        ldb: rocblas_int,
+        stride_B: rocblas_stride,
+        batch_count: rocblas_int,
+        invA: *const ::core::ffi::c_void,
+        invA_size: rocblas_int,
+        stride_invA: rocblas_stride,
+        compute_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+axpy_ex   computes constant alpha multiplied by vector x, plus vector y.
+
+y := alpha * x + y
+
+Currently supported datatypes are as follows:
+
+-------------------------------------------------
+| alpha_type | x_type | y_type | execution_type |
+|------------|--------|--------|----------------|
+|  bf16_r    | bf16_r |  bf16_r|      f32_r     |
+|  f32_r     | bf16_r |  bf16_r|      f32_r     |
+|  f16_r     | f16_r  |  f16_r |      f16_r     |
+|  f16_r     | f16_r  |  f16_r |      f32_r     |
+|  f32_r     | f16_r  |  f16_r |      f32_r     |
+|  f32_r     | f32_r  |  f32_r |      f32_r     |
+|  f64_r     | f64_r  |  f64_r |      f64_r     |
+|  f32_c     | f32_c  |  f32_c |      f32_c     |
+|  f64_c     | f64_c  |  f64_c |      f64_c     |
+-------------------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x and y.
+@param[in]
+alpha     device pointer or host pointer to specify the scalar alpha.
+@param[in]
+alpha_type [rocblas_datatype]
+specifies the datatype of alpha.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in, out]
+y         device pointer storing vector y.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_axpy_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_axpy_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+axpy_batched_ex   computes constant alpha multiplied by vector x, plus vector y over
+a set of batched vectors.
+
+y := alpha * x + y
+
+Currently supported datatypes are as follows:
+
+-------------------------------------------------
+| alpha_type | x_type | y_type | execution_type |
+|------------|--------|--------|----------------|
+|  bf16_r    | bf16_r |  bf16_r|      f32_r     |
+|  f32_r     | bf16_r |  bf16_r|      f32_r     |
+|  f16_r     | f16_r  |  f16_r |      f16_r     |
+|  f16_r     | f16_r  |  f16_r |      f32_r     |
+|  f32_r     | f16_r  |  f16_r |      f32_r     |
+|  f32_r     | f32_r  |  f32_r |      f32_r     |
+|  f64_r     | f64_r  |  f64_r |      f64_r     |
+|  f32_c     | f32_c  |  f32_c |      f32_c     |
+|  f64_c     | f64_c  |  f64_c |      f64_c     |
+-------------------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in]
+alpha     device pointer or host pointer to specify the scalar alpha.
+@param[in]
+alpha_type [rocblas_datatype]
+specifies the datatype of alpha.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in, out]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_axpy_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_axpy_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+axpy_strided_batched_ex   computes constant alpha multiplied by vector x, plus vector y over
+a set of strided batched vectors.
+
+y := alpha * x + y
+
+Currently supported datatypes are as follows:
+
+-------------------------------------------------
+| alpha_type | x_type | y_type | execution_type |
+|------------|--------|--------|----------------|
+|  bf16_r    | bf16_r |  bf16_r|      f32_r     |
+|  f32_r     | bf16_r |  bf16_r|      f32_r     |
+|  f16_r     | f16_r  |  f16_r |      f16_r     |
+|  f16_r     | f16_r  |  f16_r |      f32_r     |
+|  f32_r     | f16_r  |  f16_r |      f32_r     |
+|  f32_r     | f32_r  |  f32_r |      f32_r     |
+|  f64_r     | f64_r  |  f64_r |      f64_r     |
+|  f32_c     | f32_c  |  f32_c |      f32_c     |
+|  f64_c     | f64_c  |  f64_c |      f64_c     |
+-------------------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in]
+alpha     device pointer or host pointer to specify the scalar alpha.
+@param[in]
+alpha_type [rocblas_datatype]
+specifies the datatype of alpha.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stridex   [rocblas_stride]
+stride from the start of one vector (x_i) to the next one (x_i+1).
+There are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical
+case this means stridex >= n * incx.
+@param[in, out]
+y         device pointer to the first vector y_1.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+stridey   [rocblas_stride]
+stride from the start of one vector (y_i) to the next one (y_i+1).
+There are no restrictions placed on stridey. However, ensure that stridey is of appropriate size. For a typical
+case this means stridey >= n * incy.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_axpy_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stridey: rocblas_stride,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_axpy_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stridex: rocblas_stride,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stridey: rocblas_stride,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+dot_ex  performs the dot product of vectors x and y.
+
+result = x * y;
+
+dotc_ex  performs the dot product of the conjugate of complex vector x and complex vector y
+
+result = conjugate (x) * y;
+
+Currently supported datatypes are as follows:
+
+--------------------------------------------------
+| x_type | y_type | result_type | execution_type |
+|--------|--------|-------------|----------------|
+| f16_r  | f16_r  |    f16_r    |     f16_r      |
+| f16_r  | f16_r  |    f16_r    |     f32_r      |
+| bf16_r | bf16_r |    bf16_r   |     f32_r      |
+| f32_r  | f32_r  |    f32_r    |     f32_r      |
+| f32_r  | f32_r  |    f64_r    |     f64_r      |
+| f64_r  | f64_r  |    f64_r    |     f64_r      |
+| f32_c  | f32_c  |    f32_c    |     f32_c      |
+| f64_c  | f64_c  |    f64_c    |     f64_c      |
+--------------------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x and y.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in]
+y         device pointer storing vector y.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of vector y.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+result
+device pointer or host pointer to store the dot product.
+return is 0.0 if n <= 0.
+@param[in]
+result_type [rocblas_datatype]
+specifies the datatype of the result.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_dot_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dot_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+dot_batched_ex performs a batch of dot products of vectors x and y.
+
+result_i = x_i * y_i;
+
+dotc_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y
+
+result_i = conjugate (x_i) * y_i;
+
+where (x_i, y_i) is the i-th instance of the batch.
+x_i and y_i are vectors, for i = 1, ..., batch_count
+
+Currently supported datatypes are as follows:
+
+--------------------------------------------------
+| x_type | y_type | result_type | execution_type |
+|--------|--------|-------------|----------------|
+| f16_r  | f16_r  |    f16_r    |     f16_r      |
+| f16_r  | f16_r  |    f16_r    |     f32_r      |
+| bf16_r | bf16_r |    bf16_r   |     f32_r      |
+| f32_r  | f32_r  |    f32_r    |     f32_r      |
+| f32_r  | f32_r  |    f64_r    |     f64_r      |
+| f64_r  | f64_r  |    f64_r    |     f64_r      |
+| f32_c  | f32_c  |    f32_c    |     f32_c      |
+| f64_c  | f64_c  |    f64_c    |     f64_c      |
+--------------------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+y         device array of device pointers storing each vector y_i.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in, out]
+result
+device array or host array of batch_count size to store the dot products of each batch.
+return 0.0 for each element if n <= 0.
+@param[in]
+result_type [rocblas_datatype]
+specifies the datatype of the result.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_dot_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        batch_count: rocblas_int,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dot_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        batch_count: i64,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        batch_count: i64,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+dot_strided_batched_ex  performs a batch of dot products of vectors x and y.
+
+result_i = x_i * y_i;
+
+dotc_strided_batched_ex  performs a batch of dot products of the conjugate of complex vector x and complex vector y
+
+result_i = conjugate (x_i) * y_i;
+
+where (x_i, y_i) is the i-th instance of the batch.
+x_i and y_i are vectors, for i = 1, ..., batch_count
+
+Currently supported datatypes are as follows:
+
+--------------------------------------------------
+| x_type | y_type | result_type | execution_type |
+|--------|--------|-------------|----------------|
+| f16_r  | f16_r  |    f16_r    |     f16_r      |
+| f16_r  | f16_r  |    f16_r    |     f32_r      |
+| bf16_r | bf16_r |    bf16_r   |     f32_r      |
+| f32_r  | f32_r  |    f32_r    |     f32_r      |
+| f32_r  | f32_r  |    f64_r    |     f64_r      |
+| f64_r  | f64_r  |    f64_r    |     f64_r      |
+| f32_c  | f32_c  |    f32_c    |     f32_c      |
+| f64_c  | f64_c  |    f64_c    |     f64_c      |
+--------------------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in each x_i and y_i.
+@param[in]
+x         device pointer to the first vector (x_1) in the batch.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stride_x    [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1)
+@param[in]
+y         device pointer to the first vector (y_1) in the batch.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of each vector y_i.
+@param[in]
+incy      [rocblas_int]
+specifies the increment for the elements of each y_i.
+@param[in]
+stride_y    [rocblas_stride]
+stride from the start of one vector (y_i) and the next one (y_i+1)
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in, out]
+result
+device array or host array of batch_count size to store the dot products of each batch.
+return 0.0 for each element if n <= 0.
+@param[in]
+result_type [rocblas_datatype]
+specifies the datatype of the result.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_dot_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        batch_count: rocblas_int,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dot_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_dotc_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *const ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stride_y: rocblas_stride,
+        batch_count: i64,
+        result: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief BLAS_EX API
+
+\details
+nrm2_ex computes the euclidean norm of a real or complex vector.
+
+result := sqrt( x'*x ) for real vectors
+result := sqrt( x**H*x ) for complex vectors
+
+Currently supported datatypes are as follows:
+
+-------------------------------------
+|  x_type | result | execution_type |
+|---------|--------|----------------|
+|  bf16_r |  bf16_r|     f32_r      |
+|  f16_r  |  f16_r |     f32_r      |
+|  f32_r  |  f32_r |     f32_r      |
+|  f64_r  |  f64_r |     f64_r      |
+|  f32_c  |  f32_r |     f32_r      |
+|  f64_c  |  f64_r |     f64_r      |
+-------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+x         device pointer storing vector x.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of the vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of y.
+@param[in, out]
+results
+device pointer or host pointer to store the nrm2 product.
+return is 0.0 if n, incx<=0.
+@param[in]
+result_type [rocblas_datatype]
+specifies the datatype of the result.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_nrm2_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        results: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_nrm2_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        results: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief BLAS_EX API
+
+\details
+nrm2_batched_ex computes the euclidean norm over a batch of real or complex vectors.
+
+result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count
+result := sqrt( x_i**H*x_i ) for complex vectors x, for i = 1, ..., batch_count
+
+Currently supported datatypes are as follows:
+
+-------------------------------------
+|  x_type | result | execution_type |
+|---------|--------|----------------|
+|  bf16_r |  bf16_r|     f32_r      |
+|  f16_r  |  f16_r |     f32_r      |
+|  f32_r  |  f32_r |     f32_r      |
+|  f64_r  |  f64_r |     f64_r      |
+|  f32_c  |  f32_r |     f32_r      |
+|  f64_c  |  f64_r |     f64_r      |
+-------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each x_i.
+@param[in]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[out]
+results
+device pointer or host pointer to array of batch_count size for nrm2 results.
+return is 0.0 for each element if n <= 0, incx<=0.
+@param[in]
+result_type [rocblas_datatype]
+specifies the datatype of the result.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_nrm2_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        results: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_nrm2_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        batch_count: i64,
+        results: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief BLAS_EX API
+
+\details
+nrm2_strided_batched_ex computes the euclidean norm over a batch of real or complex vectors.
+
+result := sqrt( x_i'*x_i ) for real vectors x, for i = 1, ..., batch_count
+result := sqrt( x_i**H*x_i ) for complex vectors, for i = 1, ..., batch_count
+
+Currently supported datatypes are as follows:
+
+-------------------------------------
+|  x_type | result | execution_type |
+|---------|--------|----------------|
+|  bf16_r |  bf16_r|     f32_r      |
+|  f16_r  |  f16_r |     f32_r      |
+|  f32_r  |  f32_r |     f32_r      |
+|  f64_r  |  f64_r |     f64_r      |
+|  f32_c  |  f32_r |     f32_r      |
+|  f64_c  |  f64_r |     f64_r      |
+-------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+number of elements in each x_i.
+@param[in]
+x         device pointer to the first vector x_1.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i. incx must be > 0.
+@param[in]
+stride_x  [rocblas_stride]
+stride from the start of one vector (x_i) and the next one (x_i+1).
+There are no restrictions placed on stride_x. However, ensure that stride_x is of appropriate size. For a typical
+case this means stride_x >= n * incx.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[out]
+results
+device pointer or host pointer to array for storing contiguous batch_count results.
+return is 0.0 for each element if n <= 0, incx<=0.
+@param[in]
+result_type [rocblas_datatype]
+specifies the datatype of the result.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_nrm2_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        batch_count: rocblas_int,
+        results: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_nrm2_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *const ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        batch_count: i64,
+        results: *mut ::core::ffi::c_void,
+        result_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+rot_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to vectors x and y.
+Scalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.
+
+In the case where cs_type is real:
+
+x := c * x + s * y
+y := c * y - s * x
+
+In the case where cs_type is complex, the imaginary part of c is ignored:
+
+x := real(c) * x + s * y
+y := real(c) * y - conj(s) * x
+
+Currently supported datatypes are as follows:
+
+------------------------------------------------
+|  x_type | y_type  | cs_type | execution_type |
+|---------|---------|---------|----------------|
+|  bf16_r |  bf16_r | bf16_r  |  f32_r         |
+|  f16_r  |  f16_r  | f16_r   |  f32_r         |
+|  f32_r  |  f32_r  | f32_r   |  f32_r         |
+|  f64_r  |  f64_r  | f64_r   |  f64_r         |
+|  f32_c  |  f32_c  | f32_c   |  f32_c         |
+|  f32_c  |  f32_c  | f32_r   |  f32_c         |
+|  f64_c  |  f64_c  | f64_c   |  f64_c         |
+|  f64_c  |  f64_c  | f64_r   |  f64_c         |
+------------------------------------------------
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in the x and y vectors.
+@param[in, out]
+x       device pointer storing vector x.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of vector x.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of x.
+@param[in, out]
+y       device pointer storing vector y.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of vector y.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of y.
+@param[in]
+c       device pointer or host pointer storing scalar cosine component of the rotation matrix.
+@param[in]
+s       device pointer or host pointer storing scalar sine component of the rotation matrix.
+@param[in]
+cs_type [rocblas_datatype]
+specifies the datatype of c and s.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_rot_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        c: *const ::core::ffi::c_void,
+        s: *const ::core::ffi::c_void,
+        cs_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_rot_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        c: *const ::core::ffi::c_void,
+        s: *const ::core::ffi::c_void,
+        cs_type: rocblas_datatype,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+rot_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to batched vectors x_i and y_i, for i = 1, ..., batch_count.
+Scalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.
+
+In the case where cs_type is real:
+
+x := c * x + s * y
+y := c * y - s * x
+
+In the case where cs_type is complex, the imaginary part of c is ignored:
+
+x := real(c) * x + s * y
+y := real(c) * y - conj(s) * x
+
+Currently supported datatypes are as follows:
+
+------------------------------------------------
+|  x_type | y_type  | cs_type | execution_type |
+|---------|---------|---------|----------------|
+|  bf16_r |  bf16_r | bf16_r  |  f32_r         |
+|  f16_r  |  f16_r  | f16_r   |  f32_r         |
+|  f32_r  |  f32_r  | f32_r   |  f32_r         |
+|  f64_r  |  f64_r  | f64_r   |  f64_r         |
+|  f32_c  |  f32_c  | f32_c   |  f32_c         |
+|  f32_c  |  f32_c  | f32_r   |  f32_c         |
+|  f64_c  |  f64_c  | f64_c   |  f64_c         |
+|  f64_c  |  f64_c  | f64_r   |  f64_c         |
+------------------------------------------------
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in each x_i and y_i vectors.
+@param[in, out]
+x       device array of deivce pointers storing each vector x_i.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of each x_i.
+@param[in, out]
+y       device array of device pointers storing each vector y_i.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of each vector y_i.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of each y_i.
+@param[in]
+c       device pointer or host pointer to scalar cosine component of the rotation matrix.
+@param[in]
+s       device pointer or host pointer to scalar sine component of the rotation matrix.
+@param[in]
+cs_type [rocblas_datatype]
+specifies the datatype of c and s.
+@param[in]
+batch_count [rocblas_int]
+the number of x and y arrays, the number of batches.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_rot_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        c: *const ::core::ffi::c_void,
+        s: *const ::core::ffi::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_rot_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        c: *const ::core::ffi::c_void,
+        s: *const ::core::ffi::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS Level 1 API </b>
+
+\details
+rot_strided_batched_ex applies the Givens rotation matrix defined by c=cos(alpha) and s=sin(alpha) to strided batched vectors x_i and y_i, for i = 1, ..., batch_count.
+Scalars c and s may be stored in either host or device memory. Location is specified by calling rocblas_set_pointer_mode.
+
+In the case where cs_type is real:
+
+x := c * x + s * y
+y := c * y - s * x
+
+In the case where cs_type is complex, the imaginary part of c is ignored:
+
+x := real(c) * x + s * y
+y := real(c) * y - conj(s) * x
+
+Currently supported datatypes are as follows:
+
+------------------------------------------------
+|  x_type | y_type  | cs_type | execution_type |
+|---------|---------|---------|----------------|
+|  bf16_r |  bf16_r | bf16_r  |  f32_r         |
+|  f16_r  |  f16_r  | f16_r   |  f32_r         |
+|  f32_r  |  f32_r  | f32_r   |  f32_r         |
+|  f64_r  |  f64_r  | f64_r   |  f64_r         |
+|  f32_c  |  f32_c  | f32_c   |  f32_c         |
+|  f32_c  |  f32_c  | f32_r   |  f32_c         |
+|  f64_c  |  f64_c  | f64_c   |  f64_c         |
+|  f64_c  |  f64_c  | f64_r   |  f64_c         |
+------------------------------------------------
+
+@param[in]
+handle  [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n       [rocblas_int]
+number of elements in each x_i and y_i vectors.
+@param[in, out]
+x       device pointer to the first vector x_1.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx    [rocblas_int]
+specifies the increment between elements of each x_i.
+@param[in]
+stride_x [rocblas_stride]
+specifies the increment from the beginning of x_i to the beginning of x_(i+1)
+@param[in, out]
+y       device pointer to the first vector y_1.
+@param[in]
+y_type [rocblas_datatype]
+specifies the datatype of each vector y_i.
+@param[in]
+incy    [rocblas_int]
+specifies the increment between elements of each y_i.
+@param[in]
+stride_y [rocblas_stride]
+specifies the increment from the beginning of y_i to the beginning of y_(i+1)
+@param[in]
+c       device pointer or host pointer to scalar cosine component of the rotation matrix.
+@param[in]
+s       device pointer or host pointer to scalar sine component of the rotation matrix.
+@param[in]
+cs_type [rocblas_datatype]
+specifies the datatype of c and s.
+@param[in]
+batch_count [rocblas_int]
+the number of x and y arrays, the number of batches.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_rot_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stride_x: rocblas_stride,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: rocblas_int,
+        stride_y: rocblas_stride,
+        c: *const ::core::ffi::c_void,
+        s: *const ::core::ffi::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_rot_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stride_x: rocblas_stride,
+        y: *mut ::core::ffi::c_void,
+        y_type: rocblas_datatype,
+        incy: i64,
+        stride_y: rocblas_stride,
+        c: *const ::core::ffi::c_void,
+        s: *const ::core::ffi::c_void,
+        cs_type: rocblas_datatype,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+scal_ex  scales each element of vector x with scalar alpha.
+
+x := alpha * x
+
+Currently supported datatypes are as follows:
+
+----------------------------------------
+| alpha_type | x_type | execution_type |
+|------------|--------|----------------|
+|  f32_r     | bf16_r |     f32_r      |
+|  bf16_r    | bf16_r |     f32_r      |
+|  f16_r     | f16_r  |     f16_r      |
+|  f16_r     | f16_r  |     f32_r      |
+|  f32_r     | f16_r  |     f32_r      |
+|  f32_r     | f32_r  |     f32_r      |
+|  f64_r     | f64_r  |     f64_r      |
+|  f32_c     | f32_c  |     f32_c      |
+|  f64_c     | f64_c  |     f64_c      |
+|  f32_r     | f32_c  |     f32_c      |
+|  f64_r     | f64_c  |     f64_c      |
+----------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+alpha     device pointer or host pointer for the scalar alpha.
+@param[in]
+alpha_type [rocblas_datatype]
+specifies the datatype of alpha.
+@param[in, out]
+x         device pointer storing vector x.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of vector x.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of x.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_scal_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scal_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+scal_batched_ex  scales each element of each vector x_i with scalar alpha.
+
+x_i := alpha * x_i
+
+Currently supported datatypes are as follows:
+
+----------------------------------------
+| alpha_type | x_type | execution_type |
+|------------|--------|----------------|
+|  f32_r     | bf16_r |     f32_r      |
+|  bf16_r    | bf16_r |     f32_r      |
+|  f16_r     | f16_r  |     f16_r      |
+|  f16_r     | f16_r  |     f32_r      |
+|  f32_r     | f16_r  |     f32_r      |
+|  f32_r     | f32_r  |     f32_r      |
+|  f64_r     | f64_r  |     f64_r      |
+|  f32_c     | f32_c  |     f32_c      |
+|  f64_c     | f64_c  |     f64_c      |
+|  f32_r     | f32_c  |     f32_c      |
+|  f64_r     | f64_c  |     f64_c      |
+----------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+alpha     device pointer or host pointer for the scalar alpha.
+@param[in]
+alpha_type [rocblas_datatype]
+specifies the datatype of alpha.
+@param[in, out]
+x         device array of device pointers storing each vector x_i.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+*/
+    pub fn rocblas_scal_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scal_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** @{
+\brief <b> BLAS EX API </b>
+
+\details
+scal_strided_batched_ex  scales each element of vector x with scalar alpha over a set
+of strided batched vectors.
+
+x := alpha * x
+
+Currently supported datatypes are as follows:
+
+----------------------------------------
+| alpha_type | x_type | execution_type |
+|------------|--------|----------------|
+|  f32_r     | bf16_r |     f32_r      |
+|  bf16_r    | bf16_r |     f32_r      |
+|  f16_r     | f16_r  |     f16_r      |
+|  f16_r     | f16_r  |     f32_r      |
+|  f32_r     | f16_r  |     f32_r      |
+|  f32_r     | f32_r  |     f32_r      |
+|  f64_r     | f64_r  |     f64_r      |
+|  f32_c     | f32_c  |     f32_c      |
+|  f64_c     | f64_c  |     f64_c      |
+|  f32_r     | f32_c  |     f32_c      |
+|  f64_r     | f64_c  |     f64_c      |
+----------------------------------------
+
+@param[in]
+handle    [rocblas_handle]
+handle to the rocblas library context queue.
+@param[in]
+n         [rocblas_int]
+the number of elements in x.
+@param[in]
+alpha     device pointer or host pointer for the scalar alpha.
+@param[in]
+alpha_type [rocblas_datatype]
+specifies the datatype of alpha.
+@param[in, out]
+x         device pointer to the first vector x_1.
+@param[in]
+x_type [rocblas_datatype]
+specifies the datatype of each vector x_i.
+@param[in]
+incx      [rocblas_int]
+specifies the increment for the elements of each x_i.
+@param[in]
+stridex   [rocblas_stride]
+stride from the start of one vector (x_i) to the next one (x_i+1).
+There are no restrictions placed on stridex. However, ensure that stridex is of appropriate size. For a typical
+case this means stridex >= n * incx.
+@param[in]
+batch_count [rocblas_int]
+number of instances in the batch.
+@param[in]
+execution_type [rocblas_datatype]
+specifies the datatype of computation.
+
+*/
+    pub fn rocblas_scal_strided_batched_ex(
+        handle: rocblas_handle,
+        n: rocblas_int,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: rocblas_int,
+        stridex: rocblas_stride,
+        batch_count: rocblas_int,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_scal_strided_batched_ex_64(
+        handle: rocblas_handle,
+        n: i64,
+        alpha: *const ::core::ffi::c_void,
+        alpha_type: rocblas_datatype,
+        x: *mut ::core::ffi::c_void,
+        x_type: rocblas_datatype,
+        incx: i64,
+        stridex: rocblas_stride,
+        batch_count: i64,
+        execution_type: rocblas_datatype,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    /** BLAS Auxiliary API
+
+\details
+rocblas_status_to_string
+
+Returns string representing rocblas_status value
+
+@param[in]
+status  [rocblas_status]
+rocBLAS status to convert to string*/
+    pub fn rocblas_status_to_string(
+        status: rocblas_status,
+    ) -> *const ::core::ffi::c_char;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    /** \brief Initialize rocBLAS on the current HIP device, to avoid costly startup time at the first call on that device.
+\details
+
+Calling `rocblas_initialize()` allows upfront initialization including device specific kernel setup.
+Otherwise this function is automatically called on the first function call that requires these initializations (mainly GEMM).
+*/
+    pub fn rocblas_initialize();
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief   Loads char* buf with the rocblas library version. size_t len
+is the maximum length of char* buf.
+\details
+
+@param[in, out]
+buf             pointer to buffer for version string
+
+@param[in]
+len             length of buf
+*/
+    pub fn rocblas_get_version_string(
+        buf: *mut ::core::ffi::c_char,
+        len: usize,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief   Queries the minimum buffer size for a successful call to
+\ref rocblas_get_version_string
+\details
+
+@param[out]
+len             pointer to size_t for storing the length
+*/
+    pub fn rocblas_get_version_string_size(len: *mut usize) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief
+\details
+Indicates that subsequent rocBLAS kernel calls should collect the optimal device memory size in bytes for their given kernel arguments
+and keep track of the maximum.
+Each kernel call can reuse temporary device memory on the same stream so the maximum is collected.
+Returns rocblas_status_size_query_mismatch if another size query is already in progress; returns rocblas_status_success otherwise
+@param[in]
+handle          rocblas handle*/
+    pub fn rocblas_start_device_memory_size_query(
+        handle: rocblas_handle,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief
+\details
+Stops collecting optimal device memory size information.
+Returns rocblas_status_size_query_mismatch if a collection is not underway; rocblas_status_invalid_handle if handle is nullptr;
+rocblas_status_invalid_pointer if size is nullptr; rocblas_status_success otherwise
+@param[in]
+handle          rocblas handle
+@param[out]
+size            maximum of the optimal sizes collected*/
+    pub fn rocblas_stop_device_memory_size_query(
+        handle: rocblas_handle,
+        size: *mut usize,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    pub fn rocblas_is_device_memory_size_query(handle: rocblas_handle) -> bool;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_set_optimal_device_memory_size_impl(
+        handle: rocblas_handle,
+        count: usize,
+        ...
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_alloc(
+        handle: rocblas_handle,
+        res: *mut *mut rocblas_device_malloc_base,
+        count: usize,
+        ...
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    pub fn rocblas_device_malloc_success(ptr: *mut rocblas_device_malloc_base) -> bool;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_ptr(
+        ptr: *mut rocblas_device_malloc_base,
+        res: *mut *mut ::core::ffi::c_void,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_get(
+        ptr: *mut rocblas_device_malloc_base,
+        index: usize,
+        res: *mut *mut ::core::ffi::c_void,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    pub fn rocblas_device_malloc_free(
+        ptr: *mut rocblas_device_malloc_base,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    pub fn rocblas_device_malloc_set_default_memory_size(size: usize);
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief
+\details
+Gets the current device memory size for the handle.
+Returns rocblas_status_invalid_handle if handle is nullptr; rocblas_status_invalid_pointer if size is nullptr; rocblas_status_success otherwise
+@param[in]
+handle          rocblas handle
+@param[out]
+size            current device memory size for the handle*/
+    pub fn rocblas_get_device_memory_size(
+        handle: rocblas_handle,
+        size: *mut usize,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief
+\details
+Changes the size of allocated device memory at runtime.
+
+Any previously allocated device memory managed by the handle is freed.
+
+If size > 0 sets the device memory size to the specified size (in bytes).
+If size == 0, frees the memory allocated so far, and lets rocBLAS manage device memory in the future, expanding it when necessary.
+Returns rocblas_status_invalid_handle if handle is nullptr; rocblas_status_invalid_pointer if size is nullptr; rocblas_status_success otherwise
+@param[in]
+handle          rocblas handle
+@param[in]
+size            size of allocated device memory*/
+    pub fn rocblas_set_device_memory_size(
+        handle: rocblas_handle,
+        size: usize,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    #[must_use]
+    /** \brief
+\details
+Sets the device workspace for the handle to use.
+
+Any previously allocated device memory managed by the handle is freed.
+
+Returns rocblas_status_invalid_handle if handle is nullptr; rocblas_status_success otherwise
+@param[in]
+handle          rocblas handle
+@param[in]
+addr            address of workspace memory
+@param[in]
+size            size of workspace memory
+*/
+    pub fn rocblas_set_workspace(
+        handle: rocblas_handle,
+        addr: *mut ::core::ffi::c_void,
+        size: usize,
+    ) -> rocblas_status;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    /** \brief
+\details
+Returns true when device memory in handle is managed by rocBLAS
+@param[in]
+handle          rocblas handle*/
+    pub fn rocblas_is_managing_device_memory(handle: rocblas_handle) -> bool;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    /** \brief
+\details
+Returns true when device memory in handle is managed by the user
+@param[in]
+handle          rocblas handle*/
+    pub fn rocblas_is_user_managing_device_memory(handle: rocblas_handle) -> bool;
+}
+#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]
+extern "C" {
+    pub fn rocblas_abort() -> !;
+}
+impl rocblas_error {
+    pub const r#invalid_handle: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(1)
+    });
+    pub const r#not_implemented: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(2)
+    });
+    pub const r#invalid_pointer: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(3)
+    });
+    pub const r#invalid_size: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(4)
+    });
+    pub const r#memory_error: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(5)
+    });
+    pub const r#internal_error: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(6)
+    });
+    pub const r#perf_degraded: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(7)
+    });
+    pub const r#size_query_mismatch: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(8)
+    });
+    pub const r#size_increased: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(9)
+    });
+    pub const r#size_unchanged: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(10)
+    });
+    pub const r#invalid_value: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(11)
+    });
+    pub const r#continue: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(12)
+    });
+    pub const r#check_numerics_fail: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(13)
+    });
+    pub const r#excluded_from_build: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(14)
+    });
+    pub const r#arch_mismatch: rocblas_error = rocblas_error(unsafe {
+        ::core::num::NonZeroU32::new_unchecked(15)
+    });
+}
+#[repr(transparent)]
+#[derive(Debug, Hash, Copy, Clone, PartialEq, Eq)]
+pub struct rocblas_error(pub ::core::num::NonZeroU32);
+pub trait rocblas_statusConsts {
+    const success: rocblas_status = rocblas_status::Ok(());
+    const error_invalid_handle: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#invalid_handle,
+    );
+    const error_not_implemented: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#not_implemented,
+    );
+    const error_invalid_pointer: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#invalid_pointer,
+    );
+    const error_invalid_size: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#invalid_size,
+    );
+    const error_memory_error: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#memory_error,
+    );
+    const error_internal_error: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#internal_error,
+    );
+    const error_perf_degraded: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#perf_degraded,
+    );
+    const error_size_query_mismatch: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#size_query_mismatch,
+    );
+    const error_size_increased: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#size_increased,
+    );
+    const error_size_unchanged: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#size_unchanged,
+    );
+    const error_invalid_value: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#invalid_value,
+    );
+    const error_continue: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#continue,
+    );
+    const error_check_numerics_fail: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#check_numerics_fail,
+    );
+    const error_excluded_from_build: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#excluded_from_build,
+    );
+    const error_arch_mismatch: rocblas_status = rocblas_status::Err(
+        rocblas_error::r#arch_mismatch,
+    );
+}
+impl rocblas_statusConsts for rocblas_status {}
+#[must_use]
+pub type rocblas_status = ::core::result::Result<(), rocblas_error>;
+const _: fn() = || {
+    let _ = std::mem::transmute::<rocblas_status, u32>;
+};
+unsafe impl Send for rocblas_handle {}
+unsafe impl Sync for rocblas_handle {}
diff --git a/zluda_bindgen/src/main.rs b/zluda_bindgen/src/main.rs
index cb35c14..db57515 100644
--- a/zluda_bindgen/src/main.rs
+++ b/zluda_bindgen/src/main.rs
@@ -28,6 +28,7 @@ fn main() {
         &crate_root,
         &["..", "ext", "hip_runtime-sys", "src", "lib.rs"],
     );
+    generate_rocblas(&crate_root, &["..", "ext", "rocblas-sys", "src", "lib.rs"]);
     let cuda_functions = generate_cuda(&crate_root);
     generate_process_address_table(&crate_root, cuda_functions);
     generate_ml(&crate_root);
@@ -655,6 +656,7 @@ fn remove_type(module: &mut syn::File, type_name: &str) {
     let items = items
         .into_iter()
         .filter_map(|item| match item {
+            Item::Type(type_) if type_.ident == type_name => None,
             Item::Enum(enum_) if enum_.ident == type_name => None,
             Item::Struct(struct_) if struct_.ident == type_name => None,
             Item::Impl(impl_) if impl_.self_ty.to_token_stream().to_string() == type_name => None,
@@ -921,6 +923,58 @@ fn generate_hip_runtime(output: &PathBuf, path: &[&str]) {
     write_rust_to_file(output, &prettyplease::unparse(&module))
 }
 
+fn generate_rocblas(output: &PathBuf, path: &[&str]) {
+    let rocblas_header = new_builder()
+        .header("/opt/rocm/include/rocblas/rocblas.h")
+        .allowlist_type("^rocblas.*")
+        .allowlist_function("^rocblas.*")
+        .allowlist_var("^rocblas.*")
+        .must_use_type("rocblas_status")
+        .constified_enum("rocblas_status_")
+        .new_type_alias("^rocblas_handle$")
+        .clang_args(["-I/opt/rocm/include", "-D__HIP_PLATFORM_AMD__"])
+        .generate()
+        .unwrap()
+        .to_string();
+    let mut module: syn::File = syn::parse_str(&rocblas_header).unwrap();
+    remove_type(&mut module, "hipStream_t");
+    remove_type(&mut module, "ihipStream_t");
+    remove_type(&mut module, "hipEvent_t");
+    remove_type(&mut module, "ihipEvent_t");
+    let mut converter = ConvertIntoRustResult {
+        type_: "rocblas_status",
+        underlying_type: "rocblas_status_",
+        new_error_type: "rocblas_error",
+        error_prefix: ("rocblas_status_", "error_"),
+        success: ("rocblas_status_success", "success"),
+        constants: Vec::new(),
+    };
+    module.items = module
+        .items
+        .into_iter()
+        .filter_map(|item| match item {
+            Item::Const(const_) => converter.get_const(const_).map(Item::Const),
+            Item::Use(use_) => converter.get_use(use_).map(Item::Use),
+            Item::Type(type_) => converter.get_type(type_).map(Item::Type),
+            Item::ForeignMod(mut extern_) => {
+                extern_.attrs.push(
+                    parse_quote!(#[cfg_attr(windows, link = "rocblas", kind = "raw-dylib")]),
+                );
+                Some(Item::ForeignMod(extern_))
+            }
+            item => Some(item),
+        })
+        .collect::<Vec<_>>();
+    converter.flush(&mut module.items);
+    add_send_sync(&mut module.items, &["rocblas_handle"]);
+    let mut output = output.clone();
+    output.extend(path);
+    let text = &prettyplease::unparse(&module)
+        .replace("hipStream_t", "hip_runtime_sys::hipStream_t")
+        .replace("hipEvent_t", "hip_runtime_sys::hipEvent_t");
+    write_rust_to_file(output, text)
+}
+
 fn add_send_sync(items: &mut Vec<Item>, arg: &[&str]) {
     for type_ in arg {
         let type_ = Ident::new(type_, Span::call_site());
@@ -1115,7 +1169,7 @@ impl ConvertIntoRustResult {
                 let old_prefix_len = self.underlying_type.len() + 1 + self.error_prefix.0.len();
                 let variant_ident =
                     format_ident!("{}{}", self.error_prefix.1, &ident[old_prefix_len..]);
-                let error_ident = format_ident!("{}", &ident[old_prefix_len..]);
+                let error_ident = format_ident!("r#{}", &ident[old_prefix_len..]);
                 let expr = &const_.expr;
                 result_variants.push(quote! {
                     const #variant_ident: #type_ = #type_::Err(#new_error_type::#error_ident);